function load_data_monthly_FINAL()

% PURPOSE: load and prepare the monthly data

%% First load in Goyal Welch data
[DATA]=csvread('PredictorData2010.csv',1,0);
DATA(DATA==-999.99) = NaN;

%% Load also Michael R. Roberts data on net payout (dividends plus equity repurchases less equity issuances)
[DATA_Roberts]=csvread('Post2Web PayoutPaperDataTS 23 Sep 2011.csv',1,0);
DATA_Roberts(DATA_Roberts==-999.99) = NaN;
%% Set up dates
year=floor(DATA(:,1)/100);
monthpart=DATA(:,1)/100-floor(DATA(:,1)/100);
month=floor(monthpart*100+.00001);
dates=datenum(year,month,ones(length(year),1));

%% Assign everything else
Index=DATA(:,2);
Div=DATA(:,3);
Ern=DATA(:,4);
BtoM=DATA(:,5);
Tbill=DATA(:,6);
AAA=DATA(:,7);
BAA=DATA(:,8);
LTY=DATA(:,9);
Ntis=DATA(:,10);
Rfree=DATA(:,11);
Infl=DATA(:,12);
LTR=DATA(:,13);
Corpr=DATA(:,14);
Svar=DATA(:,15);
Csp=DATA(:,16);
CRSP_vw=DATA(:,17);
CRSP_vwx=DATA(:,18);

%% Risk free rate (continuously compounded)
rf = log(1+Rfree);

%% LHS
% Using CRSP data only
sp500vw=CRSP_vw;
predictedmat=[NaN;log(1+sp500vw(2:end))-log(1+Rfree(1:end-1))];

% Longer sample return variable (combining Shiller data and CRSP data,
% using Shiller's when CRSP was not available (pre 1926))
% Note: define excess returns as diff between continuously compouded
% returns and continuously compounded risk free rate, and where risk free
% rate is lagged once to mimic situation where the risk free rate is known at
% time t (this is also what Campbell and Thompson (2008) did)
R_index = [NaN;(Index(2:end) + (1/12)*Div(2:end) - Index(1:end-1)) ./ Index(1:end-1)];
R_index_CT = R_index;
R_index_CT(year>=1926) = sp500vw(year>=1926);
long_sample_er = [NaN;log(1+R_index_CT(2:end))-log(1+Rfree(1:end-1))];

%% Predictors start here
predictormat=[];
Predictor_names = [];
Predictor_names_short = [];

% (1) Dividend yield
ind = Index(1:end-1);
lagind = [100; ind];
dy = log(Div./lagind);
predictormat=[predictormat, dy];
Predictor_names = [Predictor_names;{'Log dividend yield'}];
Predictor_names_short = [Predictor_names_short;{'Log(DY)'}];

% (2) Earning price ratio
lep=log(Ern./Index);
predictormat=[predictormat, lep];
Predictor_names = [Predictor_names;{'Log earning price ratio'}];
Predictor_names_short = [Predictor_names_short;{'Log(EP)'}];

% (3) Smooth earning price ratio
T=length(lep);
Ernsmooth=zeros(T,1);
Ernsmooth(:,1)=nan;
for k=120:T;
	Ernsmooth(k)=mean(Ern(k-119:k));
end;
lepsmooth=log(Ernsmooth./Index);
predictormat=[predictormat, lepsmooth];
Predictor_names = [Predictor_names;{'Log smooth earning price ratio'}];
Predictor_names_short = [Predictor_names_short;{'Log(Smooth EP)'}];

% (4) log div payout ratio
payout=log(Div./Ern);
predictormat=[predictormat, payout];
Predictor_names = [Predictor_names;{'Log dividend-payout ratio'}];
Predictor_names_short = [Predictor_names_short;{'Log(DE)'}];

% (5) Book-to-Market
BtoM=BtoM;
predictormat=[predictormat, BtoM];
Predictor_names = [Predictor_names;{'Book-to-market ratio'}];
Predictor_names_short = [Predictor_names_short;{'BM'}];

% (6) T bill rate
Tbill = Tbill;
predictormat=[predictormat, Tbill];
Predictor_names = [Predictor_names;{'T-Bill rate'}];
Predictor_names_short = [Predictor_names_short;{'TBL'}];

% (7) long term yield
LTY = LTY;
predictormat=[predictormat, LTY];
Predictor_names = [Predictor_names;{'Long-term yield'}];
Predictor_names_short = [Predictor_names_short;{'LTY'}];

% (8) long term return
LTR = LTR;
predictormat=[predictormat, LTR];
Predictor_names = [Predictor_names;{'Long-term return'}];
Predictor_names_short = [Predictor_names_short;{'LTR'}];

% (9) term spread
tms = LTY - Tbill;
predictormat=[predictormat, tms];
Predictor_names = [Predictor_names;{'Term spread'}];
Predictor_names_short = [Predictor_names_short;{'TMS'}];

% (10) default yield spread
dfy = BAA - AAA;
predictormat=[predictormat, dfy];
Predictor_names = [Predictor_names;{'Default yield spread'}];
Predictor_names_short = [Predictor_names_short;{'DFY'}];

% (11) default return spread
dfr = Corpr - LTR;
predictormat=[predictormat, dfr];
Predictor_names = [Predictor_names;{'Default return spread'}];
Predictor_names_short = [Predictor_names_short;{'DFR'}];

% (12) stock variance 
Svar = Svar;
predictormat=[predictormat, Svar];
Predictor_names = [Predictor_names;{'Stock variance'}];
Predictor_names_short = [Predictor_names_short;{'SVAR'}];

% (13) net equity expansion
Ntis = Ntis;
predictormat=[predictormat, Ntis];
Predictor_names = [Predictor_names;{'Net equity expansion'}];
Predictor_names_short = [Predictor_names_short;{'NTIS'}];

% (14) inflation
Infl = Infl;
% fix outlier: 1946.6, replace with average of 1946.5 & 1946.7
Infl(907)=(Infl(906)+Infl(908))/2;
% Note: need to fix inflation series since inflation rate data are released
% in the following month
Infl = [NaN;Infl(1:end-1)];
predictormat=[predictormat, Infl];
Predictor_names = [Predictor_names;{'Inflation'}];
Predictor_names_short = [Predictor_names_short;{'INFL'}];

% (15) Dividend plus repurchase yield
ldrp = NaN(size(year,1),1);
for j=1:size(year,1)
    this_indx = find(DATA_Roberts(:,1)==year(j) & DATA_Roberts(:,2)==month(j));
    if ~isempty(this_indx)
        ldrp(j) = DATA_Roberts(this_indx,10);
    end
end
predictormat=[predictormat, ldrp];
Predictor_names = [Predictor_names;{'Log total net payout yield'}];
Predictor_names_short = [Predictor_names_short;{'Log(NPY)'}];

%% Keep only data from 1926:12 and onward (restricting to CRSP period only)
dates(year<1926)          = [];
month(year<1926)          = [];
long_sample_er(year<1926) = [];
predictormat(year<1926,:) = [];
rf(year<1926)             = [];

year(year<1926)           = [];

%% Save data and print to CSV file
if ~exist([pwd '\Temp'],'dir')
    mkdir([pwd '\Temp']);
end

% Output data for modeling
out_cell2 = cell(size(predictormat,1)+1,size(predictormat,2)+2);
out_cell2(1,:) = [{'Dates'},{'Excess return (CRSP)'},Predictor_names'];
out_cell2(2:end,1) = num2cell(datenum(year,month,1)- datenum('30-Dec-1899'));
out_cell2(2:end,2:end) = num2cell([long_sample_er predictormat]);

fid = fopen([pwd '\Temp\Data for modeling (S&P 500 monthly).csv'], 'w') ;
fprintf(fid, '%s,', out_cell2{1,1:end-1}) ;
fprintf(fid, '%s\n', out_cell2{1,end}) ;
fclose(fid) ;
dlmwrite([pwd '\Temp\Data for modeling (S&P 500 monthly).csv'], cell2mat(out_cell2(2:end,1:end)), '-append') ;

