# This code prepare the data for the empirical analysis contained in the paper
# "Identifying the sources of consumption variation" by M. Barigozzi and A. Moneta
#
# IMPORTANT INSTRUCTIONS:
# 
# The raw data (see readme.txt file for instructions how to download them) have to be saved in a folder called "datafes".
# The folder datafes must contain subfolders named after the year(s) to which the data refer. E.g. data referring to the year 1980 are saved  # in the folder "1980"
# 
# change accordingly the location of the raw data (folder "datafes"):
# e.g. "C:\\datafes\\2005-2006\\2005-06_dvhh_UKanon_v2.dta"
# 
# change accordingly the location of the folder "original_data" where the prepared data are saved 
# e.g. write.csv(Md,".../original_data/dat2005_2006def.csv" )
#
# change accordingly the location of the file "price_indices.csv" which contains the price indices for each expenditure category
# you need this file to deflate the expenditure data 
# e.g. P<-read.csv(".../price_indices.csv", dec =".", sep =";")
#
# Hint: use automatic "Find and Replace" to change these items. 
#
library(foreign)
#
##################################################  2005-2006 ################################
#



D0<- read.dta("C:\\datafes\\2005-2006\\2005-06_dvhh_UKanon_v2.dta")
# we find the age of the head of the family
age<-D0$p396p
# we find number of members
nper<-D0$a049
attach(D0)
caseno <- case; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)

colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")

M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==2006, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################

write.csv(Md,".../original_data/dat2005_2006def.csv" )

##############################################################################################

##################################################  2004-2005 ################################
rm(list=ls())
D0<- read.dta("C:\\datafes\\2004-2005\\2004-05_dvhh_ukanon_v2.dta")
# we find the age of the head of the family
age<-D0$p396p
# we find number of members
nper<-D0$a049
attach(D0)
caseno <- case; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==2005, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################

write.csv(Md,".../original_data/dat2004_2005def.csv" )


##############################################################################################

##################################################  2003-2004 ################################

rm(list=ls())

D0<- read.dta("C:\\datafes\\2003-2004\\2003-04_dvhh_equiv_ukanon.dta")
# we find the age of the head of the family
age<-D0$p396p
# we find number of members
nper<-D0$a049

attach(D0)
caseno <- case; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==2004, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################

write.csv(Md,".../original_data/dat2003_2004def.csv" )

##############################################################################################

##################################################  2002-2003 ################################

rm(list=ls())
D0<- read.dta("C:\\datafes\\2002-2003\\200203dvhhukanon.dta")
# we find the age of the head of the family
age<-D0$p396p

# we find number of members
nper<-D0$a049

attach(D0)
caseno <- case; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==2003, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat2002_2003def.csv" )

##############################################################################################

##################################################  2001-2002 ################################

rm(list=ls())
D0<- read.dta("C:\\datafes\\2001-2002\\20012dvhhukanon.dta")
# we find the age of the head of the family
age<-D0$p396p

# we find number of members 
nper<-D0$a049
attach(D0)
caseno <- case; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==2002, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat2001_2002def.csv" )



 ##############################################################################################

##################################################  2000-2001 ################################

rm(list=ls())
D0<- read.dta("C:\\datafes\\2000-2001\\set24t.dta")

######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\2000-2001\\set8.dta")[,c("caseno", "p396")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$p396*1000

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\2000-2001\\set2.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049



attach(D0)
caseno <- caseno; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==2001, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat2000_2001def.csv" )



##############################################################################################

##################################################  1999-2000 ################################

rm(list=ls())

D0<- read.dta("C:\\datafes\\1999-2000\\set24t.dta")

######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1999-2000\\set8.dta")[,c("caseno", "p396")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$p396

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1999-2000\\set2.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049


attach(D0)
caseno <- caseno; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==2000, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1999_2000def.csv" )

##############################################################################################

##################################################  1998-1999 ################################


rm(list=ls())

D0<- read.dta("C:\\datafes\\1998-1999\\set24t.dta")


######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1998-1999\\set8.dta")[,c("caseno", "p396")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$p396

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1998-1999\\set2.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049

attach(D0)
caseno <- caseno; tot_exp <- p550tp; hous<- p536tp; fuel<- p537t; food<- p538t
alc<- p539t; tob<-p540t; clot<-p541t; housgd<-p542t; housser<- p543t; pergs<-p544t
mot<-p545t; farot<-p546t; lesgd<-p547t; lessv<-p548t; misc<-p549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1999, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1998_1999def.csv" )


##############################################################################################

##################################################  1997-1998 ################################


rm(list=ls()); P<-read.csv("C:\\datafes\\finalprices.csv", dec=",", sep=";"); rownames(P)<-P[,1]

D0<- read.dta("C:\\datafes\\1997-1998\\set24t.dta")


# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1997-1998\\set8.dta")[,c("caseno", "p396")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$p396
# we find number of members
D2<-read.dta("T:\\Hiwi_2\\Michael\\1997-1998\\set2.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049


attach(D0)
caseno <- caseno; tot_exp <- xp550tp; hous<- xp536tp; fuel<- xp537t; food<- xp538t
alc<- xp539t; tob<-xp540t; clot<-xp541t; housgd<-xp542t; housser<- xp543t; pergs<-xp544t
mot<-xp545t; farot<-xp546t; lesgd<-xp547t; lessv<-xp548t; misc<-xp549t
detach(D0)
M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1998, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1997_1998def.csv" )



##############################################################################################

##################################################  1996-1997 ################################
rm(list=ls())
D1<- read.dta("C:\\datafes\\1996-1997\\set24.dta")
D2<-read.dta("C:\\datafes\\1996-1997\\set24c.dta")
if (any((D1$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this

# we find the age of the head of the family
D3<-read.dta("T:\\Hiwi_2\\Michael\\1996-1997\\set8.dta")[,c("caseno", "p396")]
if (any((D1$caseno == D3$caseno)==FALSE)){print("ERROR")}    #change this
age<-D3$p396
# we find number of members
D4<-read.dta("T:\\Hiwi_2\\Michael\\1996-1997\\set2.dta")[,c("caseno", "a049")]
if (any((D1$caseno == D4$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D4$a049



caseno <- D1[,"caseno"] 
tot_exp <- D1[,"xp550p"] + D2[,"xp550cp"]
hous<- D1[,"xp536p"] + D2[,"xp536cp"]
fuel<- D1[,"xp537"] + D2[,"xp537c"]
food<- D1[,"xp538"] + D2[,"xp538c"]
alc<- D1[,"xp539"] + D2[,"xp539c"]
tob<- D1[,"xp540"] + D2[,"xp540c"]
clot<- D1[,"xp541"] + D2[,"xp541c"]
housgd<- D1[,"xp542"] + D2[,"xp542c"]
housser<- D1[,"xp543p"] + D2[,"xp543c"]
pergs<- D1[,"xp544"] + D2[,"xp544c"]
mot<- D1[,"xp545"] + D2[,"xp545c"]
farot<- D1[,"xp546"] + D2[,"xp546c"]
lesgd<- D1[,"xp547"] + D2[,"xp547c"]
lessv<- D1[,"xp548"] + D2[,"xp548c"]
misc<- D1[,"xp549"] + D2[,"xp549c"]



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)

#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1997, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1996_1997def.csv" )

##############################################################################################

##################################################  1995-1996 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1995-1996\\set24t.dta")


# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1995-1996\\set8.dta")[,c("caseno", "p396")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$p396
# we find number of members
D2<-read.dta("T:\\Hiwi_2\\Michael\\1995-1996\\set2.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049




attach(D0)
caseno <- caseno; tot_exp <- xp550tp; hous<- xp536tp; fuel<- xp537t; food<- xp538t
alc<- xp539t; tob<-xp540t; clot<-xp541t; housgd<-xp542t; housser<- xp543tp; pergs<-xp544t
mot<-xp545t; farot<-xp546t; lesgd<-xp547t; lessv<-xp548t; misc<-xp549t
detach(D0)


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1996, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1995_1996def.csv" )

##############################################################################################

##################################################  1994-1995 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1994-1995\\set24.dta")


# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1994-1995\\set8.dta")[,c("caseno", "p396")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$p396
# we find number of members
D2<-read.dta("T:\\Hiwi_2\\Michael\\1994-1995\\set2.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049




attach(D0)
caseno <- caseno; tot_exp <- xp550p; hous<- xp536p; fuel<- xp537; food<- xp538
alc<- xp539; tob<-xp540; clot<-xp541; housgd<-xp542; housser<- xp543; pergs<-xp544
mot<-xp545; farot<-xp546; lesgd<-xp547; lessv<-xp548; misc<-xp549
detach(D0)


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1995, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1994_1995def.csv" )

##############################################################################################

##################################################  1993-1994 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1993-1994\\hmisc.dta")

# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1993-1994\\hces.dta")[,c("caseno", "a180")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$a180

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1993-1994\\hcntpers.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049
#

attach(D0)
caseno <- caseno; tot_exp <- xp550; hous<- xp536; fuel<- xp537; food<- xp538
alc<- xp539; tob<-xp540; clot<-xp541; housgd<-xp542; housser<- xp543; pergs<-xp544
mot<-xp545; farot<-xp546; lesgd<-xp547; lessv<-xp548; misc<-xp549
detach(D0)


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1994, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1993_1994def.csv" )

##############################################################################################

##################################################  1993 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1993\\hmisc.dta")

######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1993\\hces.dta")[,c("caseno", "a180")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$a180
# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1993\\hcntpers.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049

attach(D0)
caseno <- caseno; tot_exp <- xp550; hous<- xp536; fuel<- xp537; food<- xp538
alc<- xp539; tob<-xp540; clot<-xp541; housgd<-xp542; housser<- xp543; pergs<-xp544
mot<-xp545; farot<-xp546; lesgd<-xp547; lessv<-xp548; misc<-xp549
detach(D0)


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1993, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1993def.csv" )

##############################################################################################

##################################################  1992 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1992\\hmisc.dta")

######
######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1992\\hces.dta")[,c("caseno", "a180")]   
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$a180

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1992\\hcntpers.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049


attach(D0)
caseno <- caseno; tot_exp <- xp550; hous<- xp536; fuel<- xp537; food<- xp538
alc<- xp539; tob<-xp540; clot<-xp541; housgd<-xp542; housser<- xp543; pergs<-xp544
mot<-xp545; farot<-xp546; lesgd<-xp547; lessv<-xp548; misc<-xp549
detach(D0)


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1992, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1992def.csv" )

##############################################################################################

##################################################  1991 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1991\\hmisc.dta")

######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1991\\hces.dta")[,c("caseno","b264")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$b264

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1991\\hcntpers.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049


attach(D0)
caseno <- caseno; tot_exp <- xp550; hous<- xp536; fuel<- xp537; food<- xp538
alc<- xp539; tob<-xp540; clot<-xp541; housgd<-xp542; housser<- xp543; pergs<-xp544
mot<-xp545; farot<-xp546; lesgd<-xp547; lessv<-xp548; misc<-xp549
detach(D0)

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1991, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1991def.csv" )

##############################################################################################

##################################################  1990 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1990\\hmisc.dta")

######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1990\\hces.dta")[,c("caseno","b264")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$b264

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1990\\hcntpers.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049


D0 <- read.dta("C:\\datafes\\1990\\hmisc.dta")[,c("caseno", "xp508", "xp377")]
D1 <- read.dta("C:\\datafes\\1990\\hexpend1.dta")[,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1990\\hexpend2.dta")[,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1990\\hexpend3.dta")[,c("caseno", "xp370", "xp371")]
D4 <- read.dta("C:\\datafes\\1990\\hexpend4.dta")[,c("caseno", "xp405", "xp406")]
D5 <- read.dta("C:\\datafes\\1990\\hexpend5.dta")[,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1990\\hexpend6.dta")[,c("caseno", "xp400")]
D7 <- read.dta("C:\\datafes\\1990\\hexpend7.dta")[,c("caseno", "xp402")]
D8 <- read.dta("C:\\datafes\\1990\\hfuel.dta")[,c("caseno", "xp368")]    
D9 <- read.dta("C:\\datafes\\1990\\hvehicle.dta")[,c("caseno", "xp403", "xp404")] 
D10 <- read.dta("C:\\datafes\\1990\\hservice.dta")[,c("caseno", "xp401")] 

tot_exp <- cbind(D0$caseno, D0$xp508) # total expenditure (not retro recall codes)    
 
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)
M7 <- as.matrix(D7)
colnames(M7)<-names(D7)
M8 <- as.matrix(D8)
colnames(M8)<-names(D8)
M9 <- as.matrix(D9)
colnames(M9)<-names(D9)
M10 <- as.matrix(D10)
colnames(M10)<-names(D10)

listM<-list( M1, M2, M3, M4, M5, M6, M7, M8, M9, M10)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}


caseno <- D0$caseno 
tot_exp <- D0$xp508
hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[8]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[3]][,"xp371"]
clot<-nlist[[5]][,"xp372"]
housgd<-nlist[[6]][,"xp400"]
housser<- nlist[[10]][,"xp401"]
pergs<-nlist[[7]][,"xp402"]
mot<-nlist[[9]][,"xp403"]
farot<-nlist[[9]][,"xp404"]
lesgd<-nlist[[4]][,"xp405"]
lessv<-nlist[[4]][,"xp406"]
misc<-D0$xp377





M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1990, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1990def.csv" )

##############################################################################################

##################################################  1989 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1989\\hmisc.dta")

######
######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1989\\hces.dta")[,c("caseno", "b264")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$b264

# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1989\\hcntpers.dta")[,c("caseno","a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049


D0 <- read.dta("C:\\datafes\\1989\\hmisc.dta")[,c("caseno", "xp508", "xp377")]
D1 <- read.dta("C:\\datafes\\1989\\hexpend1.dta")[,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1989\\hexpend2.dta")[,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1989\\hexpend3.dta")[,c("caseno", "xp370", "xp371")]
D4 <- read.dta("C:\\datafes\\1989\\hexpend4.dta")[,c("caseno", "xp405", "xp406")]
D5 <- read.dta("C:\\datafes\\1989\\hexpend5.dta")[,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1989\\hexpend6.dta")[,c("caseno", "xp400")]
D7 <- read.dta("C:\\datafes\\1989\\hexpend7.dta")[,c("caseno", "xp402")]
D8 <- read.dta("C:\\datafes\\1989\\hfuel.dta")[,c("caseno", "xp368")]    
D9 <- read.dta("C:\\datafes\\1989\\hvehicle.dta")[,c("caseno", "xp403", "xp404")] 
D10 <- read.dta("C:\\datafes\\1989\\hservice.dta")[,c("caseno", "xp401")] 

tot_exp <- cbind(D0$caseno, D0$xp508) # total expenditure (not retro recall codes)    
 
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)
M7 <- as.matrix(D7)
colnames(M7)<-names(D7)
M8 <- as.matrix(D8)
colnames(M8)<-names(D8)
M9 <- as.matrix(D9)
colnames(M9)<-names(D9)
M10 <- as.matrix(D10)
colnames(M10)<-names(D10)

listM<-list( M1, M2, M3, M4, M5, M6, M7, M8, M9, M10)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}


caseno <- D0$caseno 
tot_exp <- D0$xp508
hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[8]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[3]][,"xp371"]
clot<-nlist[[5]][,"xp372"]
housgd<-nlist[[6]][,"xp400"]
housser<- nlist[[10]][,"xp401"]
pergs<-nlist[[7]][,"xp402"]
mot<-nlist[[9]][,"xp403"]
farot<-nlist[[9]][,"xp404"]
lesgd<-nlist[[4]][,"xp405"]
lessv<-nlist[[4]][,"xp406"]
misc<-D0$xp377


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)

colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1989, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1989def.csv" )

##############################################################################################

##################################################  1988 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1988\\hmisc.dta")

######
######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1988\\hces.dta")[,c("caseno","b264")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$b264
# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1988\\hcntpers.dta")[,c("caseno", "a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049
#

D0 <- read.dta("C:\\datafes\\1988\\hmisc.dta")[,c("caseno", "xp508", "xp377")]
D1 <- read.dta("C:\\datafes\\1988\\hexpend1.dta")[,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1988\\hexpend2.dta")[,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1988\\hexpend3.dta")[,c("caseno", "xp370", "xp371")]
D4 <- read.dta("C:\\datafes\\1988\\hexpend4.dta")[,c("caseno", "xp405", "xp406")]
D5 <- read.dta("C:\\datafes\\1988\\hexpend5.dta")[,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1988\\hexpend6.dta")[,c("caseno", "xp400")]
D7 <- read.dta("C:\\datafes\\1988\\hexpend7.dta")[,c("caseno", "xp402")]
D8 <- read.dta("C:\\datafes\\1988\\hfuel.dta")[,c("caseno", "xp368")]    
D9 <- read.dta("C:\\datafes\\1988\\hvehicle.dta")[,c("caseno", "xp403", "xp404")] 
D10 <- read.dta("C:\\datafes\\1988\\hservice.dta")[,c("caseno", "xp401")] 

tot_exp <- cbind(D0$caseno, D0$xp508) # total expenditure (not retro recall codes)    
 
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)
M7 <- as.matrix(D7)
colnames(M7)<-names(D7)
M8 <- as.matrix(D8)
colnames(M8)<-names(D8)
M9 <- as.matrix(D9)
colnames(M9)<-names(D9)
M10 <- as.matrix(D10)
colnames(M10)<-names(D10)

listM<-list( M1, M2, M3, M4, M5, M6, M7, M8, M9, M10)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}


caseno <- D0$caseno 
tot_exp <- D0$xp508
hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[8]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[3]][,"xp371"]
clot<-nlist[[5]][,"xp372"]
housgd<-nlist[[6]][,"xp400"]
housser<- nlist[[10]][,"xp401"]
pergs<-nlist[[7]][,"xp402"]
mot<-nlist[[9]][,"xp403"]
farot<-nlist[[9]][,"xp404"]
lesgd<-nlist[[4]][,"xp405"]
lessv<-nlist[[4]][,"xp406"]
misc<-D0$xp377

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1988, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1988def.csv" )

##############################################################################################

##################################################  1987 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1987\\hmisc.dta")

######
 ######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1987\\hces.dta")[,c("caseno", "b264")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
age<-D1$b264
# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1987\\hcntpers.dta")[,c("caseno","a049")]
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D2$a049
#

D0 <- read.dta("C:\\datafes\\1987\\hmisc.dta")[,c("caseno", "xp378", "xp377")]
D1 <- read.dta("C:\\datafes\\1987\\hexpend1.dta")[,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1987\\hexpend2.dta")[,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1987\\hexpend3.dta")[,c("caseno", "xp370", "xp371")]
D4 <- read.dta("C:\\datafes\\1987\\hexpend4.dta")[,c("caseno", "xp405", "xp406")]
D5 <- read.dta("C:\\datafes\\1987\\hexpend5.dta")[,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1987\\hexpend6.dta")[,c("caseno", "xp400")]
D7 <- read.dta("C:\\datafes\\1987\\hexpend7.dta")[,c("caseno", "xp402")]
D8 <- read.dta("C:\\datafes\\1987\\hfuel.dta")[,c("caseno", "xp368")]    
D9 <- read.dta("C:\\datafes\\1987\\hvehicle.dta")[,c("caseno", "xp403", "xp404")] 
D10 <- read.dta("C:\\datafes\\1987\\hservice.dta")[,c("caseno", "xp401")] 

tot_exp <- cbind(D0$caseno, D0$xp378) # total expenditure (not retro recall codes)    
 
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)
M7 <- as.matrix(D7)
colnames(M7)<-names(D7)
M8 <- as.matrix(D8)
colnames(M8)<-names(D8)
M9 <- as.matrix(D9)
colnames(M9)<-names(D9)
M10 <- as.matrix(D10)
colnames(M10)<-names(D10)

listM<-list( M1, M2, M3, M4, M5, M6, M7, M8, M9, M10)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}


caseno <- D0$caseno 
tot_exp <- D0$xp378
hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[8]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[3]][,"xp371"]
clot<-nlist[[5]][,"xp372"]
housgd<-nlist[[6]][,"xp400"]
housser<- nlist[[10]][,"xp401"]
pergs<-nlist[[7]][,"xp402"]
mot<-nlist[[9]][,"xp403"]
farot<-nlist[[9]][,"xp404"]
lesgd<-nlist[[4]][,"xp405"]
lessv<-nlist[[4]][,"xp406"]
misc<-D0$xp377



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1987, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1987def.csv" )

##############################################################################################

##################################################  1986 ################################
rm(list=ls())

D0<- read.dta("C:\\datafes\\1986\\hmisc.dta")

######
######
# we find the age of the head of the family
D1<-read.dta("T:\\Hiwi_2\\Michael\\1986\\hces.dta")[,c("caseno","b264")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}
age<-D1$b264
# we find number of members 
D2<-read.dta("T:\\Hiwi_2\\Michael\\1986\\hcntpers.dta")[,c("caseno","a049")]
#
if (any((D0$caseno == D2$caseno)==FALSE)){print("ERROR")}
nper<-D2$a049
#######


D0 <- read.dta("C:\\datafes\\1986\\hmisc.dta")[,c("caseno", "xp378", "xp377")]
D1 <- read.dta("C:\\datafes\\1986\\hexpend1.dta")[,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1986\\hexpend2.dta")[,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1986\\hexpend3.dta")[,c("caseno", "xp370")]
D4 <- read.dta("C:\\datafes\\1986\\hexpend4.dta")[,c("caseno", "xp371")]
D5 <- read.dta("C:\\datafes\\1986\\hexpend5.dta")[,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1986\\hfuel.dta")[,c("caseno", "xp368")]    


tot_exp <- cbind(D0$caseno, D0$xp378) # total expenditure (not retro recall codes)    
 
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)

listM<-list(M1, M2, M3, M4, M5, M6)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}


hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[6]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[4]][,"xp371"]
clot<-nlist[[5]][,"xp372"]



###################

D0 <- read.dta("C:\\datafes\\1986\\hmisc.dta")

tot_exp <- cbind(D0$caseno, D0$xp378) # taking total expenditure

#####################################################################



D2 <- read.dta("C:\\datafes\\1986\\hexpend1.dta")
D3 <- read.dta("C:\\datafes\\1986\\hexpend2.dta")
D4 <- read.dta("C:\\datafes\\1986\\hexpend3.dta")
D5 <- read.dta("C:\\datafes\\1986\\hexpend4.dta") 
D6 <- read.dta("C:\\datafes\\1986\\hexpend5.dta")
D7 <- read.dta("C:\\datafes\\1986\\hexpend6.dta")
D8 <- read.dta("C:\\datafes\\1986\\hexpend7.dta")
D9 <- read.dta("C:\\datafes\\1986\\hfuel.dta")
D10 <- read.dta("C:\\datafes\\1986\\hhead.dta")
D11 <- read.dta("C:\\datafes\\1986\\hhousing.dta")
D12 <- read.dta("C:\\datafes\\1986\\hincome1.dta")   
D13 <- read.dta("C:\\datafes\\1986\\hincome2.dta")          
D14 <- read.dta("C:\\datafes\\1986\\hmisc.dta")          
D15 <- read.dta("C:\\datafes\\1986\\hothers.dta")          
D16 <- read.dta("C:\\datafes\\1986\\hservice.dta")          
D17 <- read.dta("C:\\datafes\\1986\\hvehicle.dta")          
       



M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)
M7 <- as.matrix(D7)
colnames(M7)<-names(D7)
M8 <- as.matrix(D8)
colnames(M8)<-names(D8)
M9 <- as.matrix(D9)
colnames(M9)<-names(D9)
M10 <- as.matrix(D10)
colnames(M10)<-names(D10)
M11 <- as.matrix(D11)
colnames(M11)<-names(D11)
M12 <- as.matrix(D12)
colnames(M12)<-names(D12)
M13 <- as.matrix(D13)
colnames(M13)<-names(D13)
M14 <- as.matrix(D14)
colnames(M14)<-names(D14)
M15 <- as.matrix(D15)
colnames(M15)<-names(D15)
M16 <- as.matrix(D16)
colnames(M16)<-names(D16)
M17 <- as.matrix(D17)
colnames(M17)<-names(D17)


listM<-list(M2, M3, M4, M5, M6, M7, M8, M9, M10, M11, M12, M13, M14, M15, M16, M17)

lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}

le<-nrow(tot_exp)

if (max(lg)>le){print("ACHTUNG")} 


ll<-length(listM)
#tab<-matrix(1:(le*(ll+1)), ncol=ll+1)
#tab[,1:2]<-tot_exp

nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}

MM<-cbind(nlist[[1]], nlist[[2]],nlist[[3]], nlist[[4]], nlist[[5]], nlist[[6]],nlist[[7]], nlist[[8]],nlist[[9]], nlist[[10]],nlist[[11]], nlist[[12]],nlist[[13]], nlist[[14]],nlist[[15]], nlist[[16]])

c86_7<- (as.numeric(MM[,"xd401"])
+ as.numeric(MM[,"xd404"])
+ as.numeric(MM[,"xd405"])
+ as.numeric(MM[,"xd407"])
+ as.numeric(MM[,"xd411"])
+ as.numeric(MM[,"xd412"])
+ as.numeric(MM[,"xd413"])
+ as.numeric(MM[,"xd417"])
+ as.numeric(MM[,"xd419"])
+ as.numeric(MM[,"xd423"])
+ as.numeric(MM[,"xd431"])
+ as.numeric(MM[,"xd434"])
+ as.numeric(MM[,"xd436"])
+ as.numeric(MM[,"xd623"])
+ as.numeric(MM[,"xd644"])
+ as.numeric(MM[,"xd731"])
+ as.numeric(MM[,"xd732"])
+ as.numeric(MM[,"xd733"])
+ as.numeric(MM[,"xd734"])
+ as.numeric(MM[,"xd741"])
+ as.numeric(MM[,"xd746"])
+ as.numeric(MM[,"xd749"])
+ as.numeric(MM[,"xd750"])
+ as.numeric(MM[,"xd786"]))
 # (7 ) Household Goods
cons7<-c86_7


c86_8<- (as.numeric(MM[,"xd771"])
+ as.numeric(MM[,"xd781"])
+ as.numeric(MM[,"xd790"])
+ as.numeric(MM[,"xd791"])
+ as.numeric(MM[,"xd782"])
+ as.numeric(MM[,"xd788"])
+ as.numeric(MM[,"xd751"])
+ as.numeric(MM[,"xd414"])
+ as.numeric(MM[,"xd752"])
+ as.numeric(MM[,"xd796"])
+ as.numeric(MM[,"xd797"])
+ as.numeric(MM[,"xd770"])
+ as.numeric(MM[,"xd805"])
+ as.numeric(MM[,"xd807"])
+ as.numeric(MM[,"xd806"])
+ as.numeric(MM[,"xd799"])) # (8) DOMESIC AND PAID SERVICES, POSAGE, PHONE, SUBS
cons8<-c86_8

c86_9<- (as.numeric(MM[,"xd745"])
+ as.numeric(MM[,"xd624"])
+ as.numeric(MM[,"xd622"])
+ as.numeric(MM[,"xd643"]) 
+ as.numeric(MM[,"xd644"])
+ as.numeric(MM[,"xd642"])
+ as.numeric(MM[,"xd625"])
+ as.numeric(MM[,"xd773"])
+ as.numeric(MM[,"xd621"])
+ as.numeric(MM[,"xd775"])
+ as.numeric(MM[,"xd611"])
+ as.numeric(MM[,"xd612"])
+ as.numeric(MM[,"xd793"])) # (9)  PERSONAL GOODS AND SERVICES
cons9<-c86_9

c86_10<- (as.numeric(MM[,"xd501"])
+ as.numeric(MM[,"xd502"])
+ as.numeric(MM[,"xd503"])
+ as.numeric(MM[,"xd548"])
+ as.numeric(MM[,"xd549"])
+ as.numeric(MM[,"xd509"])
+ as.numeric(MM[,"xd510"])
+ as.numeric(MM[,"xd541"])
+ as.numeric(MM[,"xd542"])
+ as.numeric(MM[,"xd546"])
+ as.numeric(MM[,"xd512"])
+ as.numeric(MM[,"xd545"])) # (10) MOTORING EXPENDIURE
cons10<-c86_10

c86_11<- (as.numeric(MM[,"xd505"])
+ as.numeric(MM[,"xd508"])
+ as.numeric(MM[,"xd511"])
+ as.numeric(MM[,"xd551"])
+ as.numeric(MM[,"xd552"])
+ as.numeric(MM[,"xd553"])
+ as.numeric(MM[,"xd554"])
+ as.numeric(MM[,"xd555"])
+ as.numeric(MM[,"xd556"])
+ as.numeric(MM[,"xd557"])
+ as.numeric(MM[,"xd559"])) # (11) TRAVEL AND NON-MOOR VEHICLES EXPENSES
cons11<-c86_11

c86_12<- (as.numeric(MM[,"xd409"])
+ as.numeric(MM[,"xd420"])
+ as.numeric(MM[,"xd410"])
+ as.numeric(MM[,"xd601"])
+ as.numeric(MM[,"xd603"])
+ as.numeric(MM[,"xd604"])
+ as.numeric(MM[,"xd784"])
+ as.numeric(MM[,"xd422"])
+ as.numeric(MM[,"xd641"])
+ as.numeric(MM[,"xd722"])
+ as.numeric(MM[,"xd723"])
+ as.numeric(MM[,"xd721"])
+ as.numeric(MM[,"xd648"])
+ as.numeric(MM[,"xd631"])
+ as.numeric(MM[,"xd634"])
+ as.numeric(MM[,"xd632"])) # (12) TELEVISION, AUDIO, BOOKS, SAIONERY, LEISURE GOODS
cons12<-c86_12

c86_13<- (as.numeric(MM[,"xd761"])
+ as.numeric(MM[,"xd763"])
+ as.numeric(MM[,"xd762"])
+ as.numeric(MM[,"xd764"])
+ as.numeric(MM[,"xd766"])
+ as.numeric(MM[,"xd767"])
+ as.numeric(MM[,"xd768"])
+ as.numeric(MM[,"xd760"])
+ as.numeric(MM[,"xd756"])
+ as.numeric(MM[,"xd757"])
+ as.numeric(MM[,"xd759"])
+ as.numeric(MM[,"xd811"])
+ as.numeric(MM[,"xd812"])
+ as.numeric(MM[,"xd813"])
+ as.numeric(MM[,"xd821"])
+ as.numeric(MM[,"xd814"])
+ as.numeric(MM[,"xd823"])
+ as.numeric(MM[,"xd822"])
+ as.numeric(MM[,"xd824"])) # (13) ENTERTAINMENT, EDUCATION, HOLIDAYS, BETTING
cons13<-c86_13

####################


caseno <- D0$caseno 
tot_exp <- D0$xp378
housgd<-c86_7
housser<- c86_8
pergs<-c86_9
mot<-c86_10
farot<-c86_11
lesgd<-c86_12
lessv<-c86_13
misc<-D0$xp377



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,housgd,housser,pergs,mot,farot,lesgd,lessv,misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1986, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1986def.csv" )

##############################################################################################

##################################################  1985 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1985\\hmisc.dta")[,c("caseno", "xp378")] # total expenditure (not retro recall codes)
D0<-D0[c(-1833,-2209,-5859),] 
esc<-c(-1833,-2209,-5859)
caseno<-D0[,1]
totc<-D0[,2]
######
# we find the age of the head of the family
Dage<-read.dta("T:\\Hiwi_2\\Michael\\1985\\hces.dta")
Dage<-Dage[c(-1833,-2209,-5859),]
if (any((D0$caseno == Dage$caseno)==FALSE)){print("ERROR")}    #change this
age<-Dage$b264x

# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1985\\hcntpers.dta")[,c("caseno","a049")]
D1<-D1[c(-1833,-2209,-5859),]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049

D0 <- read.dta("C:\\datafes\\1985\\hmisc.dta")[esc,c("caseno", "xp378", "xp377")]
D1 <- read.dta("C:\\datafes\\1985\\hexpend1.dta")[esc,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1985\\hexpend2.dta")[esc,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1985\\hexpend3.dta")[esc,c("caseno", "xp370")]
D4 <- read.dta("C:\\datafes\\1985\\hexpend4.dta")[esc,c("caseno", "xp371")]
D5 <- read.dta("C:\\datafes\\1985\\hexpend5.dta")[esc,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1985\\hfuel.dta")[esc,c("caseno", "xp368")]    


tot_exp <- cbind(D0$caseno, D0$xp378) # total expenditure (not retro recall codes)    
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)

listM<-list(M1, M2, M3, M4, M5, M6)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}

caseno <- D0$caseno 
tot_exp <- D0$xp378
hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[6]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[4]][,"xp371"]
clot<-nlist[[5]][,"xp372"]
misc<-D0$xp377


# we find expenditure codes
E6<- read.dta("T:\\Hiwi_2\\Michael\\1985\\hexpend6.dta")
E7<- read.dta("T:\\Hiwi_2\\Michael\\1985\\hexpend7.dta")
Eservice<- read.dta("T:\\Hiwi_2\\Michael\\1985\\hservice.dta")
Evehicle<- read.dta("T:\\Hiwi_2\\Michael\\1985\\hvehicle.dta")
Emisc<- read.dta("T:\\Hiwi_2\\Michael\\1985\\hmisc.dta")
#if (any((E0$caseno == D0$caseno)==FALSE)){print("ERROR")} 

############################# household goods   
#xi102, xi103, xi104
#xi106
#xi108
#xi557, xi576
#xh577, xh578     <- xh? changed in xi
#xi573 
#xi119
#xi118



coln<-c("xi102","xi103","xi104","xi106","xi108","xi557","xi576","xi577","xi578","xi573","xi119","xi118")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E6)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E6[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,rep(0,ncol(c1)-1)))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g_1<-CAT #change this
#
c1<-E7[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
household_g_2<-CAT #change this
if (any((household_g_2[,1] == household_g_1[,1])==FALSE)){print("ERROR")} 
household_g<-cbind(household_g_2[,1], household_g_1[,2] + household_g_2[,2] )

############################# Household services   

#xi135, xi640, xi641, xi642, xi643
#xi128
#xi626, xi627, xi628, xi653
#xb180, xi663, xi665
#xi662


c1<-Eservice[,c("caseno","xi135","xi640","xi641","xi642","xi643","xi128","xi626",
"xi627","xi628","xi653","xb180","xi663","xi665","xi662")]
####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this

############################# personal goods and services   
#xi570, xi571, xi572
#xi555, xi556
#xi567, xi568
#xi566, xi575
#xi133



coln<-c("xi570","xi571","xi572","xi555","xi556","xi567","xi568","xi566","xi575",
"xi133")
ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E7)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E7[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
personal_g_and_s_1<-CAT #change this
#
c1<-Eservice[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
personal_g_and_s_2<-CAT #change this
if (any((personal_g_and_s_2[,1] == personal_g_and_s_1[,1])==FALSE)){print("ERROR")} 
personal_g_and_s<-cbind(personal_g_and_s_2[,1], personal_g_and_s_1[,2] + personal_g_and_s_2[,2] )

############################# motoring    
#xi592, xi597
#xi122, xi593, xi598
#xi897
#xb186, xb187, xb188, xb189, xb270

c1<-Evehicle[,c("caseno","xi592","xi597","xi122","xi593","xi598","xi897",
"xb186","xb187","xb188","xb189","xb270")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this

#############################  fares and other travel   
#xi610
#xb191, xb219, xb255, xi124, xi125, xi126, xi624
#xi621, xi622, xi623

c1<-Evehicle[,c("caseno","xi610","xb191","xb219","xb255","xi124","xi125","xi126",
"xi624","xi621","xi622","xi623")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this

############################# leisure goods    
#xi105
#xi554
#xi112, xi565
#xi563, xi564, xi574


coln<-c("xi105","xi554","xi112","xi565","xi563","xi564","xi574")
ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E6)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E6[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_g_1<-CAT #change this
#
c1<-E7[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_g_2<-CAT #change this
if (any((leisure_g_2[,1] == leisure_g_1[,1])==FALSE)){print("ERROR")} 
leisure_g<-cbind(leisure_g_2[,1], leisure_g_1[,2] + leisure_g_2[,2] )

############################# leisure service   
#xi629, xi630, xi631, xi632, xi633, xi634
#xi635, xi637
#xi654, xi655, xi656
#xi743
#xi747

coln<-c("xi629","xi630","xi631","xi632","xi633","xi634","xi635","xi637","xi654","xi655",
"xi656","xi743","xi747")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(Eservice)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-Eservice[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_s_1<-CAT #change this
#
c1<-Emisc[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_s_2<-CAT #change this
if (any((leisure_s_2[,1] == leisure_s_1[,1])==FALSE)){print("ERROR")} 
leisure_s<-cbind(leisure_s_2[,1], leisure_s_1[,2] + leisure_s_2[,2] )



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)

colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1985, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1985def.csv" )

##############################################################################################

##################################################  1984 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1984\\hmisc.dta")[,c("caseno", "xp378")] # total expenditure (not retro recall codes)
D0<-D0[-5026,]
caseno<-D0[,1]
totc<-D0[,2]
######
# we find the age of the head of the family
Dage<-read.dta("T:\\Hiwi_2\\Michael\\1984\\hces.dta")
Dage<-Dage[-5026,]
if (any((D0$caseno == Dage$caseno)==FALSE)){print("ERROR")}    #change this
age<-Dage$b264x
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1984\\hcntpers.dta")[,c("caseno","a049")]
D1<-D1[-5026,]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049



D0 <- read.dta("C:\\datafes\\1984\\hmisc.dta")[-5026,c("caseno", "xp378", "xp377")]
D1 <- read.dta("C:\\datafes\\1984\\hexpend1.dta")[-5026,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1984\\hexpend2.dta")[-5026,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1984\\hexpend3.dta")[-5026,c("caseno", "xp370")]
D4 <- read.dta("C:\\datafes\\1984\\hexpend4.dta")[-5026,c("caseno", "xp371")]
D5 <- read.dta("C:\\datafes\\1984\\hexpend5.dta")[-5026,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1984\\hfuel.dta")[-5026,c("caseno", "xp368")]    

tot_exp <- cbind(D0$caseno, D0$xp378) # total expenditure (not retro recall codes)    
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)

listM<-list(M1, M2, M3, M4, M5, M6)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}

caseno <- D0$caseno 
tot_exp <- D0$xp378
hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[6]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[4]][,"xp371"]
clot<-nlist[[5]][,"xp372"]
misc<-D0$xp377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot, misc)

#######
# we find expenditure codes
E6<- read.dta("T:\\Hiwi_2\\Michael\\1984\\hexpend6.dta")
E7<- read.dta("T:\\Hiwi_2\\Michael\\1984\\hexpend7.dta")
Eservice<- read.dta("T:\\Hiwi_2\\Michael\\1984\\hservice.dta")
Evehicle<- read.dta("T:\\Hiwi_2\\Michael\\1984\\hvehicle.dta")
Emisc<- read.dta("T:\\Hiwi_2\\Michael\\1984\\hmisc.dta")
#if (any((D0$caseno == E0$caseno)==FALSE)){print("ERROR")} 

############################# household goods   
#source table E6 and E7
#old code
#X401,X404,X405,X407
#X411,X412,X413,X417,X419,X423,X786,X788,X415,X786,X788
#X431,X644,X741
#X746,X749
#X623
#X731,X732
#X733,X734
#X434,X436

#new code
#xh102, xh103, xh104
#xh106
#xh108
#xh576
#xh555
#xh577, xh578
#xh573 
#xh119
#xh118


coln<-c("xh102","xh103","xh104","xh106","xh108","xh576","xh555","xh577","xh578",
"xh573","xh119","xh118")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E6)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E6[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,rep(0,ncol(c1)-1)))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g_1<-CAT #change this
#
c1<-E7[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
household_g_2<-CAT #change this
if (any((household_g_2[,1] == household_g_1[,1])==FALSE)){print("ERROR")} 
household_g<-cbind(household_g_2[,1], household_g_1[,2] + household_g_2[,2] )

############################# Household services   

# source table only hservice.dta 

#old code 
#X771,X790,X791,X781,X782,X788
#X751,X752
#X795,X796,X797
#X770
#X799

#new code
#xh135, xh640, xh641, xh642, xh643
#xh128
#xh626, xh627, xh628, xh653
#xb180, xh663, xh665
#xh662

c1<-Eservice[,c("caseno","xh135","xh640","xh641","xh642","xh643","xh128",
"xh626","xh627","xh628","xh653","xb180","xh663","xh665","xh662")]

####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
#
############################# personal goods and services   
#source table E7 and Eservice

#old code
#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

#new code
#xh570, xh571, xh572
#xh553, xh554
#xh567, xh568
#xh566, xh575
#xh133


coln<-c("xh570","xh571","xh572","xh553","xh554","xh567","xh568","xh566","xh575",
"xh133")
ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E7)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E7[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
personal_g_and_s_1<-CAT #change this
#
c1<-Eservice[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
personal_g_and_s_2<-CAT #change this
if (any((personal_g_and_s_2[,1] == personal_g_and_s_1[,1])==FALSE)){print("ERROR")} 
personal_g_and_s<-cbind(personal_g_and_s_2[,1], personal_g_and_s_1[,2] + personal_g_and_s_2[,2] )

############################# motoring    
#source is table Evehicle

#old code
#X509,X510,X511
#X541,X542
#X546,X549,

#new code
#xh592, xh597
#xh122, xh593, xh598
#xh898
#xb186, xb187, xb188, xb189, xb270


c1<-Evehicle[,c("caseno","xh592","xh597","xh122","xh593","xh598","xh898",
"xb186","xb187","xb188","xb189","xb270")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this

#############################  fares and other travel   
# source is Evehicle

#old code
#(X505,X508)
#X551,X552,X553,X554
#X555,X556,X557,X559

#new code
#xh610
#xb191, xb219, xb255, xh124, xh125, xh126, xh623
#xh621, xh622, xh623


c1<-Evehicle[,c("caseno","xh610","xb191","xb219","xb255","xh124","xh125","xh126",
"xh623","xh621","xh622","xh623")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this

############################# leisure goods    
#source is tbale E6 and E7

#old code
#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#xd631,xd632,xd633,xd634


#new code
#xh105
#xh552
#xh112, xh563
#xh562, xh561,

coln<-c("xh105","xh552","xh112","xh563","xh562","xh561")
ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E6)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E6[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_g_1<-CAT #change this
#
c1<-E7[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_g_2<-CAT #change this
if (any((leisure_g_2[,1] == leisure_g_1[,1])==FALSE)){print("ERROR")} 
leisure_g<-cbind(leisure_g_2[,1], leisure_g_1[,2] + leisure_g_2[,2] )



############################# leisure service   
#source tables are Eservice and Emisc

#old code
#xd761,xd762,xd763,xd764,xd765,xd766,xd767,xd769
#X768
#X756,X757,X759
#X811,X812, X813, X814
#X821,X822, X823, X824

#new code
#xh629, xh630, xh631, xh632, xh633, xh634
#xh635, xh637
#xh654, xh655, xh656
#xh745
#xh750


coln<-c("xh629","xh630","xh631","xh632","xh633","xh634","xh635","xh637","xh654","xh655",
"xh656","xh745","xh750")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(Eservice)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-Eservice[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_s_1<-CAT #change this
#
c1<-Emisc[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
#
if (any(is.na(mtc))) {
wo<-which(is.na(mtc))
CN<-CN[-wo,]
}
mtc<-match(CN[,1],CAT[,1])
#
CAT[mtc,2]<-CN[,2]
leisure_s_2<-CAT #change this
if (any((leisure_s_2[,1] == leisure_s_1[,1])==FALSE)){print("ERROR")} 
leisure_s<-cbind(leisure_s_2[,1], leisure_s_1[,2] + leisure_s_2[,2] )

########
##
M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1984, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1984def.csv" )

##############################################################################################

##################################################  1983 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1983\\hmisc.dta")[,c("caseno", "xp378")]
caseno<-D0[,1]
totc<-D0[,2]

#
# we find the age of the head of the family
age<-read.dta("T:\\Hiwi_2\\Michael\\1983\\hhead.dta")[,"p356"]

# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1983\\hcntpers.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1983\\hmisc.dta")[,c("caseno", "xp378", "xp377")]
D1 <- read.dta("C:\\datafes\\1983\\hexpend1.dta")[,c("caseno", "xp367")]
D2 <- read.dta("C:\\datafes\\1983\\hexpend2.dta")[,c("caseno", "xp369")]
D3 <- read.dta("C:\\datafes\\1983\\hexpend3.dta")[,c("caseno", "xp370")]
D4 <- read.dta("C:\\datafes\\1983\\hexpend4.dta")[,c("caseno", "xp371")]
D5 <- read.dta("C:\\datafes\\1983\\hexpend5.dta")[,c("caseno", "xp372")]
D6 <- read.dta("C:\\datafes\\1983\\hfuel.dta")[,c("caseno", "xp368")]    


tot_exp <- cbind(D0$caseno, D0$xp378) # total expenditure (not retro recall codes)    
M1 <- as.matrix(D1)
colnames(M1)<-names(D1)
M2 <- as.matrix(D2)
colnames(M2)<-names(D2)
M3 <- as.matrix(D3)
colnames(M3)<-names(D3)
M4 <- as.matrix(D4)
colnames(M4)<-names(D4)
M5 <- as.matrix(D5)
colnames(M5)<-names(D5)
M6 <- as.matrix(D6)
colnames(M6)<-names(D6)

listM<-list(M1, M2, M3, M4, M5, M6)
lg<-1:length(listM)
for (i in 1:length(listM)){
lg[i]<-nrow(listM[[i]])
}
le<-nrow(tot_exp)
if (max(lg)>le){print("ACHTUNG")} 
ll<-length(listM)
nlist<-listM

for (i in 1:(ll)){
nlist[[i]]<-matrix(1:(le*(ncol(listM[[i]]))), ncol=ncol(listM[[i]]))
colnames(nlist[[i]]) <-colnames(listM[[i]])
nlist[[i]][,1]<-tot_exp[,1]
for(j in (1:le)){
if (any(tot_exp[j,1]==listM[[i]][,1])){
nlist[[i]][j,]<-listM[[i]][which(tot_exp[j,1]==listM[[i]][,1]),]
}
else{nlist[[i]][j,]<-c(tot_exp[j,1],rep(0, ncol(listM[[i]])-1))}
}
}

caseno <- D0$caseno 
tot_exp <- D0$xp378
hous<- nlist[[1]][,"xp367"]
fuel<-nlist[[6]][,"xp368"]
food<-nlist[[2]][,"xp369"]
alc<- nlist[[3]][,"xp370"]
tob<-nlist[[4]][,"xp371"]
clot<-nlist[[5]][,"xp372"]
misc<-D0$xp377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot, misc)

E6<- read.dta("T:\\Hiwi_2\\Michael\\1983\\hexpend6.dta")
E7<- read.dta("T:\\Hiwi_2\\Michael\\1983\\hexpend7.dta")
Eservice<- read.dta("T:\\Hiwi_2\\Michael\\1983\\hservice.dta")
Evehicle<- read.dta("T:\\Hiwi_2\\Michael\\1983\\hvehicle.dta")
Emisc<- read.dta("T:\\Hiwi_2\\Michael\\1983\\hmisc.dta")

#if (any((D0$caseno == E0$caseno)==FALSE)){print("ERROR")}
 
############################# household goods      
#source table E6 and E7
#old code
#X401,X404,X405,X407
#X411,X412,X413,X417,X419,X423,X786,X788,X415,X786,X788
#X431,X644,X741
#X746,X749
#X623
#X731,X732
#X733,X734
#X434,X436

#new code
#xg102, xg103, xg104
#xg106
#xg108, xg565
#xg544
#xg566, xg567
#xg562 
#xg119
#xg118

coln<-c("xg102","xg103","xg104","xg106","xg108","xg565","xg544","xg566","xg567",
"xg562","xg119","xg118")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E6)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E6[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g_1<-CAT #change this
#
c1<-E7[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g_2<-CAT #change this
if (any((household_g_2[,1] == household_g_1[,1])==FALSE)){print("ERROR")} 
household_g<-cbind(household_g_2[,1], household_g_1[,2] + household_g_2[,2] )

############################# Household services   
# source table only hservice.dta 

#old code 
#X771,X790,X791,X781,X782,X788
#X751,X752
#X795,X796,X797
#X770
#X799

#new code
#xg135, xg629, xg630, xg631, xg632
#xg128
#xg615, xg616, xg617, xg642
#xb180, xg652, xg654
#xg651

c1<-Eservice[,c("caseno","xg135", "xg629","xg630","xg631","xg632","xg128",
"xg615", "xg616", "xg617", "xg642","xb180", "xg652", "xg654","xg651")]
####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
#



############################# personal goods and services
#source table E7 and Eservice

#old code
#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

#new code
#xg559, xg560, xg561,
#xg542, xg543
#xg556, xg557
#xg555, xg564 
#xg133

coln<-c("xg559","xg560","xg561","xg542","xg543","xg556","xg557","xg555",
"xg564","xg133")
ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E7)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E7[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s_1<-CAT #change this
#
c1<-Eservice[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s_2<-CAT #change this
if (any((personal_g_and_s_2[,1] == personal_g_and_s_1[,1])==FALSE)){print("ERROR")} 
personal_g_and_s<-cbind(personal_g_and_s_2[,1], personal_g_and_s_1[,2] + personal_g_and_s_2[,2] )

############################# motoring
#source is table Evehicle

#old code
#X509,X510,X511
#X541,X542
#X546,X549,

#new code
#xg579,xg586
#xg122,xg582,xg587
#xg892
#xb186, xb187, xb188, xb189, xb270

c1<-Evehicle[,c("caseno","xg579","xg586","xg582","xg587","xg122","xg892","xb186","xb187","xb188","xb189","xb270")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this


#############################  fares and other travel
# source is Evehicle

#old code
#(X505,X508)
#X551,X552,X553,X554
#X555,X556,X557,X559

#new code
#xg599
#xb191, xb219, xb255, xg124, xg125, xg126, xg613
#xg610, xg611, xg612

c1<-Evehicle[,c("caseno","xg599","xb191","xb219","xb255","xg124","xg125","xg126","xg613",
"xg610","xg611","xg612")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this

############################# leisure goods
#source is tbale E6 and E7

#old code
#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#xd631,xd632,xd633,xd634


#new code
#xg105
#xg541
#xg112, xg 552
#xg551, xg550,

coln<-c("xg105","xg541","xg112","xg552","xg551","xg550")
ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E6)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E6[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g_1<-CAT #change this
#
c1<-E7[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g_2<-CAT #change this
if (any((leisure_g_2[,1] == leisure_g_1[,1])==FALSE)){print("ERROR")} 
leisure_g<-cbind(leisure_g_2[,1], leisure_g_1[,2] + leisure_g_2[,2] )

############################# leisure service
#source tables are Eservice and Emisc

#old code
#xd761,xd762,xd763,xd764,xd765,xd766,xd767,xd769
#X768
#X756,X757,X759
#X811,X812, X813, X814
#X821,X822, X823, X824

#new code
#xg618, xg619, xg620, xg621, xg 622, xg623
#xg624, xg626
#xg643, xg644, xg645
#xg734
#xg739

coln<-c("xg618","xg619","xg620","xg621","xg622","xg623","xg624",
"xg626","xg643","xg644","xg645","xg734","xg739")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(Eservice)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-Eservice[,c("caseno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))           
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s_1<-CAT #change this
#
c1<-Emisc[,c("caseno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s_2<-CAT #change this
if (any((leisure_s_2[,1] == leisure_s_1[,1])==FALSE)){print("ERROR")} 
leisure_s<-cbind(leisure_s_2[,1], leisure_s_1[,2] + leisure_s_2[,2] )



########
##
M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1983, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1983def.csv" )

##############################################################################################

##################################################  1982 ################################
# we create total consumption
rm(list=ls())
library(foreign)
D0 <- read.dta("T:\\Hiwi_2\\Michael\\1982\\hhprod5.dta")[,c("hhno", "xp378", "p396")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/100
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1982\\hhchars.dta")[,c("hhno","a049")]
if (any((D0$hhno == D1$hhno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049
#######

D0 <- read.dta("C:\\datafes\\1982\\hhprod5.dta")[,c("hhno", "xp378", "xp377", "xp367", "xp369", "xp370", "xp371", "xp372", "xp368")]
   

caseno <- D0$hhno 
tot_exp <- D0$xp378
hous<- D0$xp367
fuel<-D0$xp368
food<-D0$xp369
alc<- D0$xp370
tob<-D0$xp371
clot<-D0$xp372
misc<-D0$xp377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)

# we find expenditure codes
E1<- read.dta("T:\\Hiwi_2\\Michael\\1982\\persexp1.dta")
E2<- read.dta("T:\\Hiwi_2\\Michael\\1982\\persexp2.dta")
E3<- read.dta("T:\\Hiwi_2\\Michael\\1982\\persexp3.dta")
E4<- read.dta("T:\\Hiwi_2\\Michael\\1982\\persexp4.dta")
E5<- read.dta("T:\\Hiwi_2\\Michael\\1982\\persexp5.dta")
E6<- read.dta("T:\\Hiwi_2\\Michael\\1982\\persexp6.dta")


############################# HOUSEHOLD GOODS
#X401,X404,X405,X407
#X411,X412,X413,X417,X419,X423,X786,X788
#X431,X644,X741
#X746,X749
#X623
#X731,X732
#X733,X734
#X434,X436

coln<-c("xd401","xd404","xd405","xd407","xd411","xd412","xd413","xd417","xd419","xd423",
"xd786","xd788","xd431","xd644","xd741","xd746","xd749",
"xd623","xd731","xd732","xd733","xd734","xd434","xd436")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E4)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E4[,c("hhno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g_1<-CAT #change this
#
c1<-E5[,c("hhno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g_2<-CAT #change this
if (any((household_g_2[,1] == household_g_1[,1])==FALSE)){print("ERROR")} 
household_g<-cbind(household_g_2[,1], household_g_1[,2] + household_g_2[,2] )



############################# Household services 
# source table only persexp5.dta (E1)
 
#X771,X790,X791,X781,X782,X788
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-E5[,c("hhno","xd771","xd790","xd791","xd781","xd782","xd788",
"xd751","xd752",
"xd795","xd796","xd797",
"xd770",
"xd799")]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
#


############################# personal goods and services
# source table only persexp5.dta (E1)

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-E5[,c("hhno","xd622","xd624","xd745","xd642","xd643","xd621","xd625",
"xd773","xd775","xd611","xd612","xd793")]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
#

############################# motoring     PROBLEM

#X501,X502,X503,X504   SOURCE UNCLEAR, not part of table E1-E6

# source of the rest ist table E4
#X509,X510,X511
#X541,X542
#X546,X549,


c1<-E4[,c("hhno","xd509","xd510","xd511",
"xd541","xd542","xd546","xd549")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this


############################# fares and other travel
# source only table E4
#(X505,X508)
#X551,X552,X553,X554
#X555,X556,X557,X559


c1<-E4[,c("hhno","xd505","xd508","xd551","xd552","xd553","xd554","xd555","xd556","xd557","xd559")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this


############################# leisure goods
#source E4 and E5

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X634

coln<-c("xd409","xd422","xd601","xd603","xd784","xd641","xd721","xd722",
"xd723","xd655","xd631","xd632","xd634")

ind1<-1:length(coln)
ind2<-1:length(coln)
c1<-0
c2<-0
for (i in 1:length(coln)){
if (any(colnames(E4)==coln[i])) {c1<-c1+1; ind1[c1]<-i}
else {c2<-c2+1; ind2[c2]<-i}
}
ind1<-ind1[1:c1]
ind2<-ind2[1:c2]


c1<-E4[,c("hhno",coln[ind1])]     ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g_1<-CAT #change this
#
c1<-E5[,c("hhno",coln[ind2])]    ####
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g_2<-CAT #change this
if (any((leisure_g_2[,1] == leisure_g_1[,1])==FALSE)){print("ERROR")} 
leisure_g<-cbind(leisure_g_2[,1], leisure_g_1[,2] + leisure_g_2[,2] )


############################# leisure services
# source is only table E5

#X761,X762,X763,X764,X766,X767,X769
#X768
#X756,X757,X759
#X811,X812, X813, X814
#X821,X822, X823, X824
 
c1<-E5[,c("hhno","xd761","xd762","xd763","xd764","xd766","xd767","xd769",
"xd768","xd756","xd757","xd759","xd811","xd812","xd813", "xd814","xd821",
"xd822","xd823","xd824")]
nc<-ncol(c1)
c1[is.na(c1)]<-0
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind,1]
CN[1,2]<- sum(c1[1:ind[1],2:nc])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],2:nc])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this

#######
#
M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1982, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1982def.csv" )

##############################################################################################

##################################################  1981 ################################
rm(list=ls())
# we create total consumption

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1981\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
 # we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1981\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049
D0 <- read.dta("C:\\datafes\\1981\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)

# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1981\\per.dta")
if(any(is.na(E0))){print("ERROR! NA present")} ######
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X407
#X411,X412,X413,X417,X419,X423,X786,X788
#X431,X644,X741
#X746,X749
#X623
#X731,X732
#X733,X734
#X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==417 | dati[,2]==719 | 
dati[,2]==423 | dati[,2]==786 | dati[,2]==788 | dati[,2]==431 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==734 | dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X781,X782,X788
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X541,X542
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==541 | dati[,2]==542 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X508)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==508 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X766,X767,X769
#X768
#X756,X757,X759
#X811,X812, X813, X814
#X821,X822, X823, X824 


c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 |
dati[,2]==813 | dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1981, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1981def.csv" )

##############################################################################################

##################################################  1980 ################################
rm(list=ls())



D0 <- read.dta("T:\\Hiwi_2\\Michael\\1980\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1980\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1980\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)

colnames(M)<-c("caseno", "total expenditure", "housing (net)", "fuel light and power",
"food", "alcoholic drink", "tobacco", "clothing and footwear" , "miscellaneous")


E0<- read.dta("T:\\Hiwi_2\\Michael\\1980\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X407
#X411,X412,X413,X417,X419,X423,X786,X788
#X431,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==417 | dati[,2]==719 | 
dati[,2]==423 | dati[,2]==786 | dati[,2]==788 | dati[,2]==431 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X781,X782,X788
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X508)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==508 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X766,X767,X769
#X768
#X756,X757,X759
#X811,X812, X813, X814
#X821,X822, X823, X824 


c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 |
dati[,2]==813 | dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1980, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1980def.csv" )

##############################################################################################

##################################################  1979 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1979\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1979\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1979\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
E0<- read.dta("T:\\Hiwi_2\\Michael\\1979\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X407
#X411,X412,X413,X417,X419,X423,X786,X788
#X431,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==417 | dati[,2]==719 | 
dati[,2]==423 | dati[,2]==786 | dati[,2]==788 | dati[,2]==431 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X781,X782,X788
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X508)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==508 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X766,X767,X769
#X768
#X756,X757,X759
#X811,X812, X813, X814
#X821,X822, X823, X824 


c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 |
dati[,2]==813 | dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1979, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1979def.csv" )

##############################################################################################

##################################################  1978 ################################
rm(list=ls())


######
######
# we find the age of the head of the family
D0 <- read.dta("T:\\Hiwi_2\\Michael\\1978\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1978\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1978\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)

E0<- read.dta("T:\\Hiwi_2\\Michael\\1978\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X407
#X411,X412,X413,X417,X419,X423,X786,X788
#X431,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==417 | dati[,2]==719 | 
dati[,2]==423 | dati[,2]==786 | dati[,2]==788 | dati[,2]==431 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X781,X782,X788
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X508)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==508 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X766,X767,X769
#X768
#X756,X757,X759
#X811,X812
#X821,X822

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 | 
dati[,2]==821 | dati[,2]==822,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1978, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1978def.csv" )

##############################################################################################

##################################################  1977 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1977\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1977\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1977\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)

E0<- read.dta("T:\\Hiwi_2\\Michael\\1977\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X418,X419,X420,X421,X423,X786,X788
#X431,X432,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X433,X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==718 | dati[,2]==719 | dati[,2]==720 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==786 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X756,X757,X758,X759
#X811,X812,X814
#X821,X822,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==758 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1977, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1977def.csv" )

##############################################################################################

##################################################  1976 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1976\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1976\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1976\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)

#########
# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1976\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X418,X419,X420,X421,X423,X786,X788
#X431,X432,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X433,X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==718 | dati[,2]==719 | dati[,2]==720 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==786 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X756,X757,X758,X759
#X811,X812,X814
#X821,X822,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==758 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1976, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1976def.csv" )

##############################################################################################

##################################################  1975 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1975\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1975\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1975\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
########
# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1975\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X418,X419,X420,X421,X423,X785,X788
#X431,X432,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X433,X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==718 | dati[,2]==719 | dati[,2]==720 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==785 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X756,X757,X758,X759
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==758 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1975, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1975def.csv" )

##############################################################################################

##################################################  1974 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1974\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1974\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049

D0 <- read.dta("C:\\datafes\\1974\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
########
# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1974\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X418,X419,X420,X421,X423,X788
#X431,X432,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X433,X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==718 | dati[,2]==719 | dati[,2]==720 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X756,X757,X758,X759
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==758 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1974, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1974def.csv" )

##############################################################################################

##################################################  1973 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1973\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1973\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049


D0 <- read.dta("C:\\datafes\\1973\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
E0<- read.dta("T:\\Hiwi_2\\Michael\\1973\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X402,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X421,X423,X785,X788
#X431,X432,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X433,X434,X436

c1<-dati[dati[,2]==401 | dati[,2]==402 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==785 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==436,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X773,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 |
dati[,2]==773 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510,X511
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==511 | 
dati[,2]==547 | dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X409,X422,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==409 | dati[,2]==422 | dati[,2]==601 | dati[,2]==603 | dati[,2]==784 | 
dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X756,X757,X758,X759
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==756 | dati[,2]==757 | dati[,2]==758 | dati[,2]==759 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1973, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1973def.csv" )

##############################################################################################

##################################################  1972################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1972\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1972\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049

D0 <- read.dta("C:\\datafes\\1972\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
#######
# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1972\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X402,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X420,X421,X423,X427,X785,X788
#X431,X432,X644,X741
#X746,X749
#X623
#X731,X732
#X733
#X433,X434,X435,X436,X437,X438

c1<-dati[dati[,2]==401 | dati[,2]==402 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==420 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==427 | dati[,2]==785 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==746 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==435 | dati[,2]==436 | dati[,2]==437 | dati[,2]==438,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X783,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 | dati[,2]==783 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624,X745
#X642,X643
#X621,X625,X626,X773,X774,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==745 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 | dati[,2]==626 |
dati[,2]==773 | dati[,2]==774 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==547 |
dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X408,X409,X410,X422,X424,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==408 | dati[,2]==409 | dati[,2]==410 | dati[,2]==422 | dati[,2]==424 | dati[,2]==601 | dati[,2]==603 |
dati[,2]==784 | dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X757,X776,X777,X778
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==757 | dati[,2]==776 | dati[,2]==777 | dati[,2]==778 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1972, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1972def.csv" )

##############################################################################################

##################################################  1971 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1971\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1971\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049
#######


D0 <- read.dta("C:\\datafes\\1971\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1971\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X402,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X420,X421,X423,X427,X785,X788
#X431,X432,X644,X741
#X742,X749
#X623
#X731,X732
#X733
#X433,X434,X435,X436,X437,X438

c1<-dati[dati[,2]==401 | dati[,2]==402 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==420 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==427 | dati[,2]==785 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==742 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==435 | dati[,2]==436 | dati[,2]==437 | dati[,2]==438,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X783,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 | dati[,2]==783 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624
#X642,X643
#X621,X625,X626,X773,X774,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 | dati[,2]==626 |
dati[,2]==773 | dati[,2]==774 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==547 |
dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X408,X409,X410,X422,X424,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==408 | dati[,2]==409 | dati[,2]==410 | dati[,2]==422 | dati[,2]==424 | dati[,2]==601 | dati[,2]==603 |
dati[,2]==784 | dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X776,X777,X778
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==776 | dati[,2]==777 | dati[,2]==778 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1971, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1971def.csv" )

##############################################################################################

##################################################  1970 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1970\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
#
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1970\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049
########


D0 <- read.dta("C:\\datafes\\1970\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
E0<- read.dta("T:\\Hiwi_2\\Michael\\1970\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X402,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X420,X421,X423,X427,X785,X788
#X431,X432,X644,X741
#X742,X749
#X623
#X731,X732
#X733
#X433,X434,X435,X436,X437,X438

c1<-dati[dati[,2]==401 | dati[,2]==402 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==420 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==427 | dati[,2]==785 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==742 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==435 | dati[,2]==436 | dati[,2]==437 | dati[,2]==438,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X790,X791,X792,X781,X782,X783,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==781 | dati[,2]==782 | dati[,2]==783 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624
#X642,X643
#X621,X625,X626,X773,X774,X775
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==625 | dati[,2]==626 |
dati[,2]==773 | dati[,2]==774 | dati[,2]==775 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==547 |
dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X408,X409,X410,X422,X424,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==408 | dati[,2]==409 | dati[,2]==410 | dati[,2]==422 | dati[,2]==424 | dati[,2]==601 | dati[,2]==603 |
dati[,2]==784 | dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X765,X766,X767,X769
#X768
#X776,X777,X778
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==776 | dati[,2]==777 | dati[,2]==778 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])



M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1970, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1970def.csv" )



##############################################################################################

##################################################  1969 ################################
rm(list=ls())

D0 <- read.dta("T:\\Hiwi_2\\Michael\\1969\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1969\\hcr.dta")[,c("caseno","a049")]
#if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-1:length(D0$caseno)
count<-1
for(i in 1:length(D0$caseno)){
if (any(D1$caseno==D0$caseno[i])){
nper[i]<-D1$a049[count]
count<-count+1
}
else{nper[i]<-NA}
}
#######


D0 <- read.dta("C:\\datafes\\1969\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1969\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X402,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X420,X421,X423,X427,X785,X788
#X431,X432,X644,X741
#X742,X749
#X623
#X731,X732
#X733
#X433,X434,X435,X436,X437,X438

c1<-dati[dati[,2]==401 | dati[,2]==402 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==420 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==427 | dati[,2]==785 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==742 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==435 | dati[,2]==436 | dati[,2]==437 | dati[,2]==438,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X772,X790,X791,X792,X782,X783,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==772 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==782 | dati[,2]==783 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624
#X642,X643
#X621,X773,X774,X775,X780
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==773 | dati[,2]==774 |
dati[,2]==775 | dati[,2]==780 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==547 |
dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X408,X409,X410,X422,X424,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==408 | dati[,2]==409 | dati[,2]==410 | dati[,2]==422 | dati[,2]==424 | dati[,2]==601 | dati[,2]==603 |
dati[,2]==784 | dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X766,X767,X769
#X768
#X776,X777,X778
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==765 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==776 | dati[,2]==777 | dati[,2]==778 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])


M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1969, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1969def.csv" )

##############################################################################################

##################################################  1968 ################################
rm(list=ls())

D0<-read.dta("T:\\Hiwi_2\\Michael\\1968\\hpc.dta")[,c("caseno", "p378", "p356")]
caseno<-D0[,1]
totc<-D0[,2]
# we find the age of the head of the family
age<-D0[,3]/10
# we find number of members 
D1<-read.dta("T:\\Hiwi_2\\Michael\\1968\\hcr.dta")[,c("caseno","a049")]
if (any((D0$caseno == D1$caseno)==FALSE)){print("ERROR")}    #change this
nper<-D1$a049
#######


D0 <- read.dta("C:\\datafes\\1968\\hpc.dta")[,c("caseno", "p378", "p377", "p367", "p369", "p370", "p371", "p372", "p368")]
   

caseno <- D0$caseno 
tot_exp <- D0$p378
hous<- D0$p367
fuel<-D0$p368
food<-D0$p369
alc<- D0$p370
tob<-D0$p371
clot<-D0$p372
misc<-D0$p377

M<-cbind(caseno,tot_exp,hous,fuel,food,alc,tob,clot,misc)
# we find expenditure codes
E0<- read.dta("T:\\Hiwi_2\\Michael\\1968\\per.dta")
colcod<-as.numeric(substring(E0[,5],2,5))
dati<-cbind(E0[,1], colcod, E0[,6])
dati <-dati[order(dati[,1]),]
######################### HOUSEHOLD GOODS

#X401,X402,X404,X405,X406,X407
#X411,X412,X413,X414,X415,X417,X420,X421,X423,X427,X785,X788
#X431,X432,X644,X741
#X742,X749
#X623
#X731,X732
#X733
#X433,X434,X435,X436,X437,X438

c1<-dati[dati[,2]==401 | dati[,2]==402 | dati[,2]==404 | dati[,2]==405 | dati[,2]==406 | dati[,2]==407 | dati[,2]==411 |
dati[,2]==412 | dati[,2]==413 | dati[,2]==414 | dati[,2]==415 | dati[,2]==417 | dati[,2]==420 | dati[,2]==421 |
dati[,2]==423 | dati[,2]==427 | dati[,2]==785 | dati[,2]==788 | dati[,2]==431 | dati[,2]==432 | dati[,2]==644 |
dati[,2]==741 | dati[,2]==742 | dati[,2]==749 | dati[,2]==623 | dati[,2]==731 | dati[,2]==732 | dati[,2]==733 |
dati[,2]==433 | dati[,2]==434 | dati[,2]==435 | dati[,2]==436 | dati[,2]==437 | dati[,2]==438,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_g<-CAT #change this
######################### HOUSEHOLD SERVICES

#X771,X772,X790,X791,X792,X782,X783,X788,X789
#X751,X752
#X795,X796,X797
#X770
#X799

c1<-dati[dati[,2]==771 | dati[,2]==772 | dati[,2]==790 | dati[,2]==791 | dati[,2]==792 | dati[,2]==782 | dati[,2]==783 |      
dati[,2]==788 | dati[,2]==789 | dati[,2]==751 | dati[,2]==752 | dati[,2]==795 | dati[,2]==796 | dati[,2]==797 |               
dati[,2]==770 | dati[,2]==799,]                                                                            
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
household_s<-CAT #change this
######################### personal goods and services

#X622,X624
#X642,X643
#X621,X773,X774,X775,X780
#X611,X612
#X793

c1<-dati[dati[,2]==622 | dati[,2]==624 | dati[,2]==642 | dati[,2]==643 | dati[,2]==621 | dati[,2]==773 | dati[,2]==774 |
dati[,2]==775 | dati[,2]==780 | dati[,2]==611 | dati[,2]==612 | dati[,2]==793,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
personal_g_and_s<-CAT #change this
######################### motoring

#X501,X502,X503,X504
#X509,X510
#X547
#X546,X549,

c1<-dati[dati[,2]==501 | dati[,2]==502 | dati[,2]==503 | dati[,2]==504 | dati[,2]==509 | dati[,2]==510 | dati[,2]==547 |
dati[,2]==546 | dati[,2]==549,] 
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
motoring<-CAT #change this
######################### fares and other travel

#(X505,X506)
#X551,X552,X553,X554
#X555,X556,X557,X559

c1<-dati[dati[,2]==505 | dati[,2]==506 | dati[,2]==551 | dati[,2]==552 | dati[,2]==553 | dati[,2]==554 | dati[,2]==555 |
dati[,2]==556 | dati[,2]==557 | dati[,2]==559,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
fares_other_travel<-CAT #change this
######################### leisure goods

#X408,X409,X410,X422,X424,X601,X603,X784
#X641
#X721,X722,X723,X655
#X631,X632,X633,X634

c1<-dati[dati[,2]==408 | dati[,2]==409 | dati[,2]==410 | dati[,2]==422 | dati[,2]==424 | dati[,2]==601 | dati[,2]==603 |
dati[,2]==784 | dati[,2]==641 | dati[,2]==721 | dati[,2]==722 | dati[,2]==723 | dati[,2]==655 | dati[,2]==631 | 
dati[,2]==632 | dati[,2]==633 | dati[,2]==634,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_g<-CAT #change this
######################### leisure services

#X761,X762,X763,X764,X766,X767,X769
#X768
#X776,X777,X778
#X811,X812,X813,X814
#X821,X822,X823,X824

c1<-dati[dati[,2]==761 | dati[,2]==762 | dati[,2]==763 | dati[,2]==764 | dati[,2]==766 | dati[,2]==767 | dati[,2]==769 |
dati[,2]==768 | dati[,2]==776 | dati[,2]==777 | dati[,2]==778 | dati[,2]==811 | dati[,2]==812 | dati[,2]==813 | 
dati[,2]==814 | dati[,2]==821 | dati[,2]==822 | dati[,2]==823 | dati[,2]==824,]
c2<-rbind(c1[-1,],c(c1[nrow(c1),1]+1,0,0))
ind<-which((c2[,1]-c1[,1])!=0)
CN<-matrix(nrow=length(ind), ncol=2)
CN[,1]<-c1[ind]
CN[1,2]<- sum(c1[1:ind[1],3])
for (i in 2:length(ind)){
CN[i,2]<- sum(c1[(ind[i-1]+1):ind[i],3])
}
##
CAT<-matrix(nrow=length(caseno), ncol=2)
CAT[,1]<-caseno
mtc<-match(CN[,1],CAT[,1])
CAT[mtc,2]<-CN[,2]
leisure_s<-CAT #change this


M<-cbind(caseno,totc,nper,age,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2])

M<-cbind(caseno, age, nper, tot_exp,hous,fuel,food,alc,tob,clot,household_g[,2],household_s[,2],personal_g_and_s[,2],motoring[,2],
fares_other_travel[,2],leisure_g[,2],leisure_s[,2],misc)
colnames(M)<-c("caseno", "age", "nper", "total_expenditure", "housing_net", "fuel_light_power",
"food", "alcoholic_drink", "tobacco", "clothing_and_footwear" , "household_goods",
"household_services", "personal_goods_and_services", "motoring", "fares_and_other_travel", "leisure_goods",
"leisure_services", "miscellaneous")
M<-ifelse(is.na(M),0,M)
#### price deflation#################
P<-read.csv(".../price_indices.csv", dec =".", sep =";")
P<-as.matrix(P)
pri<-P[P[,1]==1968, 2:15] / 100
pri_misc<- pri[1]-sum(pri[2:14])/ 14
pri<-c(pri, pri_misc)
names(pri)[15]<-"misc"
PM<-t(matrix(nrow=15, ncol= nrow(M), pri))
Md<-M
Md[,4:18] <- M[,4:18] / PM 
########################################
write.csv(Md,".../original_data/dat1968def.csv" )
