**============================================================================**
* Jianhao Lin, Jiacheng Fan, Yifan Zhang, Liangyuan Chen, "Real-time 
*   Macroeconomic Projection Using Narrative Central Bank Communication", 
*   Journal of Applied Econometrics, forthcoming
**----------------------------------------------------------------------------**

* Replicates Figures 2 - 5 in the Paper.

**----------------------------------------------------------------------------**
* Jiacheng Fan, 2022 (fanjch7@mail2.sysu.edu.cn)
**============================================================================**
version 16
capture log close
clear
clear matrix
program drop _all
clear mata
set more off
pause off

* Adjust this path to where you stored the files
global path = "...\lin-fan-zhang-chen-files"

* Set up paths for reading data and storing results
global datpath = "$path\Data\"
global figpath = "$path\Results\"

cd $path


*===============================================================================
* FIGURE 2 Frequency of PBC oral communication events.
*===============================================================================
import excel $datpath\Oral_Frequency.xlsx, sheet("Oral") firstrow clear
keep time2 Article Interview Press Speech
ren time2 time
tsset time
format time %tq

twoway line Article time, lw(0.3) lp(dash) lc(gs9)	///
	|| line Interview time, lw(0.3) lp(solid) lc(gs6)	///
	|| line Press time, lw(0.3) lp(longdash) lc(gs3)	///
	|| line Speech time, lw(0.3) lp(dash_dot) lc(gs1)	///
	ylabel(, nogrid labs(3) tp(i)) ytitle("Number of events", axis(1))	///
	tlabel(172(8)236, format(%ty) labs(3) tp(i)) ttick(172(4)238, tp(i)) ttitle("") 	///
	tlabel(172 "2003" 180 "2005" 188 "2007" 196 "2009" 204 "2011" 212 "2013" 220 "2015" 228 "2017" 236 "2019") ///
	legend(order(1 "Article" 2 "Interview" 3 "Press" 4 "Speech"))		///
	legend(bm(tiny) row(1) pos(6)) ///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph export $figpath\Fig_2.pdf, replace	


*===============================================================================
* FIGURE 3 Full-sample estimation of the PBC communication indices for predicting GDP and CPI from the repetition and inclusion of words.
*===============================================================================
*-------------------------------------
*	Panel A: GDP growth rate (GDP)
*-------------------------------------
import excel $datpath\PCIs_Fullsample.xlsx, sheet("fullsample") firstrow cellrange(A1:C199) clear
gen yq = tq(2003q1)+_n-1
format yq %tq
order yq gdp
tsset yq

** Covert monthly z+ z0 to quarterly z+ z0
local vars "gdp"
foreach var in `vars' {
	cap mat drop `var'_zp `var'_zz mon_`var'
	mkmat `var'_zp `var'_zz, nomis
	mat mon_`var' = [`var'_zp, `var'_zz]
	cap mat drop q`var'_zp q`var'_zz
	forval i = 1(3)198 {
		local j1 = `i'
		local j2 = `i' + 1
		local j3 = `i' + 2
		local qzp = (mon_`var'[`j1',1] + mon_`var'[`j2',1] + mon_`var'[`j3',1])/3
		local qzz = (mon_`var'[`j1',2] + mon_`var'[`j2',2] + mon_`var'[`j3',2])/3
		mat q`var'_zp = (nullmat(q`var'_zp) \ `qzp')
		mat q`var'_zz = (nullmat(q`var'_zz) \ `qzz')
	}
	cap mat drop qua_`var'
	mat qua_`var' = [q`var'_zp, q`var'_zz]
	mat coln qua_`var' = q`var'_zp q`var'_zz
	cap drop qiva_zp qiva_zz qfai_zp qfai_zz qcpi_zp qcpi_zz qm2_zp qm2_zz qppi_zp qppi_zz
	svmat qua_`var', names(col)
}
drop if _n>66

** Plot (a)
twoway bar qgdp_zp yq, yaxis(1) barw(0.6) lw(0) col(gs10)  ///
	|| line gdp yq, yaxis(2) lw(0.6) lp(solid) lc(gs4)	  ///
	ylabel(-.4(.2).2, axis(1) nogrid labs(3) tp(i)) ytitle("{it:z}{sup:+}{sub:GDP}", axis(1)) ///
	ylabel(4(4)16, axis(2) nogrid labs(3) tp(i)) ytitle("GDP", axis(2)) ///
	tlabel(2003q1(8)2019q2, format(%ty) labs(3) tp(i)) ttick(2003q1(4)2019q2, tp(i)) ttitle("") 	///
	tlabel(172 "2003" 180 "2005" 188 "2007" 196 "2009" 204 "2011" 212 "2013" 220 "2015" 228 "2017" 236 "2019") ///	
	legend(order(2 "Year-on-year GDP growth rate" 1 "Quarterly { it:z}{sup:+}{sub:GDP}")) ///
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph export $figpath\Fig_3A_a.pdf, replace

** Plot (b)
twoway bar qgdp_zz yq, yaxis(2) barw(0.6) lw(0) col(gs10)  ///
	|| line gdp yq, yaxis(1) lw(0.6) lp(solid) lc(gs4)	  ///
	ylabel(-1(.4).2, axis(2) nogrid labs(3) tp(i)) ytitle("{it:z}{sup:0}{sub:GDP}", axis(2)) ///
	ylabel(6(3)15, axis(1) nogrid labs(3) tp(i)) ytitle("GDP", axis(1))	///		
	tlabel(188(8)236, format(%ty)  tp(i)) ttick(188(4)236, tp(i)) ttitle("") 	///
	tlabel(172 "2003" 180 "2005" 188 "2007" 196 "2009" 204 "2011" 212 "2013" 220 "2015" 228 "2017" 236 "2019") ///
	legend(order(2 "Year-on-year GDP growth rate" 1 "Quarterly { it:z}{sup:0}{sub:GDP}")) ///
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	title("Full-sample (2003:Q1 - 2019:Q2)", size(5) ring(100))	///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph save $figpath\Fig_3A_b1, replace
	
twoway bar qgdp_zz yq if _n<=52, yaxis(2) barw(0.6) lw(0) col(gs10)  ///
	|| line gdp yq if _n<=52, yaxis(1) lw(0.6) lp(solid) lc(gs4)	///
	ylabel(-.2(.1).1, axis(2) nogrid labs(3) tp(i)) ytitle("{it:z}{sup:0}{sub:GDP}", axis(2)) ///
	ylabel(6(3)15, axis(1) nogrid labs(3) tp(i)) ytitle("GDP", axis(1))	///		
	tlabel(188(8)223, format(%ty)  tp(i)) ttick(188(4)223, tp(i)) ttitle("") 	///
	tlabel(172 "2003" 180 "2005" 188 "2007" 196 "2009" 204 "2011" 212 "2013" 220 "2015") ///
	legend(order(2 "Year-on-year GDP growth rate" 1 "Quarterly { it:z}{sup:0}{sub:GDP}")) ///
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	title("Subsample (2003:Q1 - 2015:Q4)", size(5) ring(100))	///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph save $figpath\Fig_3A_b2, replace
	
grc1leg $figpath\Fig_3A_b1.gph $figpath\Fig_3A_b2.gph, row(2) ///
	iscale(0.7) imargin(10 10 5 0) legend($figpath\Fig_3A_b1.gph) pos(6)	///
	scheme(lean2)
erase $figpath\Fig_3A_b1.gph
erase $figpath\Fig_3A_b2.gph
graph export $figpath\Fig_3A_b.pdf, replace


*------------------------------------------
*	Panel B: Consumer price index (CPI)
*------------------------------------------
import excel $datpath\PCIs_Fullsample.xlsx, sheet("fullsample") firstrow cellrange(I1:L199) clear
gen ym = tm(2003m1)+_n-1
format ym %tm
order ym cpi
tsset ym

** Plot (a)
twoway bar cpi_zp ym, yaxis(2) barw(0.6) lw(0) col(gs10) ///
	|| line cpi ym, yaxis(1) lw(0.6) lp(solid) lc(gs4)	 ///
	ylabel(98(4)110, nogrid axis(1) labs(3) tp(i)) ytitle("CPI", axis(1))	///
	ylabel(-.5(.3).4, axis(2) labs(3) tp(i)) ytitle("{it:z}{sup:+}{sub:CPI}", axis(2)) ///
	tlabel(516(24)713, format(%tm) labs(3) tp(i)) ttick(516(12)713, tp(i)) ttitle("") 	///
	tlabel(516 "2003" 540 "2005" 564 "2007" 588 "2009" 612 "2011" 636 "2013" 660 "2015" 684 "2017" 708 "2019") ///
	legend(order(2 "Consumer price index (CPI)" 1 "{it:z}{sup:+}{sub:CPI}")) ///
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph export $figpath\Fig_3B_a.pdf, replace

** Plot (b)
twoway bar cpi_zz ym, yaxis(2) barw(0.6) lw(0) col(gs10) ///
	|| line cpi ym, yaxis(1) lw(0.6) lp(solid) lc(gs4)	///
	ylabel(98(4)110, axis(1) nogrid labs(3) tp(i)) ytitle("CPI", axis(1))	///
	ylabel(-.6(.4).6, axis(2) labs(3) tp(i)) ytitle("{it:z}{sup:0}{sub:CPI}", axis(2)) ///
	tlabel(516(24)713, format(%tm) labs(3) tp(i)) ttick(516(12)713, tp(i)) ttitle("") 	///
	tlabel(516 "2003" 540 "2005" 564 "2007" 588 "2009" 612 "2011" 636 "2013" 660 "2015" 684 "2017" 708 "2019") ///
	legend(order(2 "Consumer price index (CPI)" 1 "{it:z}{sup:0}{sub:CPI}")) ///
	legend(bm(tiny) row(1) pos(6) holes(2)) ///	
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph export $figpath\Fig_3B_b.pdf, replace


*===============================================================================
* FIGURE 4 Correlation between target variables and PBC communication indices.
*===============================================================================
import excel $datpath\PCIs_Fullsample.xlsx, sheet("fullsample") firstrow clear
gen ym = tm(2003m1)+_n-1
format ym %tm
order ym gdp
tsset ym

** Covert monthly z+ z0 to quarterly z+ z0
local vars "iva fai cpi m2 ppi"
foreach var in `vars' {
	cap mat drop `var'_zp `var'_zz mon_`var'
	mkmat `var'_zp `var'_zz, nomis
	mat mon_`var' = [`var'_zp, `var'_zz]
	cap mat drop q`var'_zp q`var'_zz
	forval i = 1(3)198 {
		local j1 = `i'
		local j2 = `i' + 1
		local j3 = `i' + 2
		local qzp = (mon_`var'[`j1',1] + mon_`var'[`j2',1] + mon_`var'[`j3',1])/3
		local qzz = (mon_`var'[`j1',2] + mon_`var'[`j2',2] + mon_`var'[`j3',2])/3
		mat q`var'_zp = (nullmat(q`var'_zp) \ `qzp')
		mat q`var'_zz = (nullmat(q`var'_zz) \ `qzz')
	}
	cap mat drop qua_`var'
	mat qua_`var' = [q`var'_zp, q`var'_zz]
	mat coln qua_`var' = q`var'_zp q`var'_zz
	cap drop qiva_zp qiva_zz qfai_zp qfai_zz qcpi_zp qcpi_zz qm2_zp qm2_zz qppi_zp qppi_zz
	svmat qua_`var', names(col)
}

** Correlation coefficient (monthly indicators first: IVA, FAI, CPI, M2, PPI)
loca vars "iva fai cpi m2 ppi"	
foreach var in `vars' {
	pwcorr `var' gdp_zp gdp_zz iva_zp iva_zz fai_zp fai_zz cpi_zp cpi_zz m2_zp m2_zz ppi_zp ppi_zz, star(.05)
	cap mat drop Corr_`var' corr_`var'
	mat Corr_`var' = r(C)
	mat corr_`var' = Corr_`var'[2..13,1]
}

** Correlation coefficient (quarterly indicators first: GDP)
pwcorr gdp qgdp_zp qgdp_zz qiva_zp qiva_zz qfai_zp qfai_zz qcpi_zp qcpi_zz qm2_zp qm2_zz qppi_zp qppi_zz, star(.05)	// quarterly indicators
mat Corr_gdp = r(C)
mat corr_gdp = Corr_gdp[2..13,1]

cap mat drop Corr
mat Corr = [corr_gdp, corr_iva, corr_fai, corr_cpi, corr_m2, corr_ppi]	// Corr matrix
mat rown Corr = z+_GDP z0_GDP z+_IVA z0_IVA z+_FAI z0_FAI z+_CPI z0_CPI z+_M2 z0_M2 z+_PPI z0_PPI
mat coln Corr = GDP IVA FAI CPI M2 PPI
mat list Corr	// correlation coefficient matrix

heatplot Corr, values(format(%9.2f) mlabs(3) mlabc(white)) ///
	color(spmap, greys) ramp(r labels(@min 0 @max) f(%9.2f)) ///
	yscale(noextend) xscale(noextend) ///
	ylabel(1 "{it:z}{sup:+}{sub:GDP}" 2 "{it:z}{sup:0}{sub:GDP}" ///
		   3 "{it:z}{sup:+}{sub:IVA}" 4 "{it:z}{sup:0}{sub:IVA}" ///
		   5 "{it:z}{sup:+}{sub:FAI}" 6 "{it:z}{sup:0}{sub:FAI}" ///
		   7 "{it:z}{sup:+}{sub:CPI}" 8 "{it:z}{sup:0}{sub:CPI}" ///
		   9 "{it:z}{sup:+}{sub:M2}" 10 "{it:z}{sup:0}{sub:M2}" ///
		   11 "{it:z}{sup:+}{sub:PPI}" 12 "{it:z}{sup:0}{sub:PPI}", labs(3) tp(i)) ///
	xlabel(, labs(3) tp(i)) ///	   
	plotregion(lc(white)) aspectratio(1) scheme(lean1)	
graph export $figpath\Fig_4.pdf, replace


*===============================================================================
* FIGURE 5 Full-sample and real-time estimations of PBC communication indices for predicting GDP and CPI.
*===============================================================================
*-------------------------------------
*	Panel A: GDP growth rate (GDP)
*-------------------------------------
import excel $datpath\PCIs_Fullsample_Realtime.xlsx, sheet("gdp") cellrange(B1:E199) firstrow clear
gen ym = tm(2003m1)+_n-1
format ym %tm
tsset ym

** Moving average
tssmooth ma MA_gdp_zpf=gdp_zpf, window(5 1)
tssmooth ma MA_gdp_zzf=gdp_zzf, window(5 1)
tssmooth ma MA_gdp_zpr=gdp_zpr, window(5 1)
tssmooth ma MA_gdp_zzr=gdp_zzr, window(5 1)

replace MA_gdp_zpf =. in 1/5
replace MA_gdp_zzf =. in 1/5
replace MA_gdp_zpr =. in 1/5
replace MA_gdp_zzr =. in 1/5

** Plot (a)
corr MA_gdp_zpf MA_gdp_zpr
local corr : di %3.2f r(C)[2,1]
twoway line MA_gdp_zpf ym, yaxis(1) lw(0.6) lp(solid) lc(gs5)	///
	|| line MA_gdp_zpr ym, yaxis(2) lw(0.6) lp(dash) lc(gs5)	///
	ylabel(-.3(.2).3, nogrid axis(1) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:+}{sub:GDP} (full-sample)", axis(1)) ///
	ylabel(-.4(.2).2, nogrid axis(2) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:+}{sub:GDP} (real-time)", axis(2))  ///	
	tlabel(516(24)713, format(%tm) labs(3) tp(i)) ttick(516(12)713, tp(i)) ttitle("") 	///
	tlabel(516 "2003" 540 "2005" 564 "2007" 588 "2009" 612 "2011" 636 "2013" 660 "2015" 684 "2017" 708 "2019") ///	
	xline(600, lc(gs13) lp(dash) lw(0.3))	///
	text(.3 696 "Corr = `corr'", place(c) size(4) box lc(gs1) bc(gs16)) ///
	title("(a) {it:z}{sup:+}{sub:GDP} (Word-repetition)", size(5) ring(100))	///
	legend(order(1 "Full-sample (6-month MA)" 2 "Real-time (6-month MA)"))	///	
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph save $figpath\Fig_5A_1, replace

** Plot (b)
corr MA_gdp_zzf MA_gdp_zzr
local corr : di %3.2f r(C)[2,1]
twoway line MA_gdp_zzf ym, yaxis(1) lw(0.6) lp(solid) lc(gs5)	///
	|| line MA_gdp_zzr ym, yaxis(2) lw(0.6) lp(dash) lc(gs5)	///
	ylabel(-.6(.3).3, nogrid axis(1) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:0}{sub:GDP} (full-sample)", axis(1)) ///
	ylabel(-2(1)1, nogrid axis(2) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:0}{sub:GDP} (real-time)", axis(2))  ///	
	tlabel(516(24)713, format(%tm) labs(3) tp(i)) ttick(516(12)713, tp(i)) ttitle("") 	///
	tlabel(516 "2003" 540 "2005" 564 "2007" 588 "2009" 612 "2011" 636 "2013" 660 "2015" 684 "2017" 708 "2019") ///	
	xline(600, lc(gs13) lp(dash) lw(0.3))	///
	text(.3 696 "Corr = `corr'", place(c) size(4) box lc(gs1) bc(gs16)) ///
	title("(b) {it:z}{sup:0}{sub:GDP} (Word-inclusion)", size(5) ring(100))	///
	legend(order(1 "Full-sample (6-month MA)" 2 "Real-time (6-month MA)"))	///	
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph save $figpath\Fig_5A_2, replace

grc1leg $figpath\Fig_5A_1.gph $figpath\Fig_5A_2.gph, row(2) ///
	iscale(0.7) imargin(10 10 5 0) legend($figpath\Fig_5A_1.gph) pos(6)	///
	scheme(lean2)
erase $figpath\Fig_5A_1.gph
erase $figpath\Fig_5A_2.gph
graph export $figpath\Fig_5A.pdf, replace	


*------------------------------------------
*	Panel B: Consumer price index (CPI)
*------------------------------------------
import excel $datpath\PCIs_Fullsample_Realtime.xlsx, sheet("cpi") cellrange(B1:E199) firstrow clear
gen ym = tm(2003m1)+_n-1
format ym %tm
tsset ym

** Moving average
tssmooth ma MA_cpi_zpf=cpi_zpf, window(5 1)
tssmooth ma MA_cpi_zzf=cpi_zzf, window(5 1)
tssmooth ma MA_cpi_zpr=cpi_zpr, window(5 1)
tssmooth ma MA_cpi_zzr=cpi_zzr, window(5 1)

replace MA_cpi_zpf =. in 1/5
replace MA_cpi_zzf =. in 1/5
replace MA_cpi_zpr =. in 1/5
replace MA_cpi_zzr =. in 1/5

** Plot (a)
corr MA_cpi_zpf MA_cpi_zpr
local corr : di %3.2f r(C)[2,1]
twoway line MA_cpi_zpf ym, yaxis(1) lw(0.6) lp(solid) lc(gs5)	///
	|| line MA_cpi_zpr ym, yaxis(2) lw(0.6) lp(dash) lc(gs5)	///
	ylabel(-.3(.2).3, nogrid axis(1) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:+}{sub:CPI} (full-sample)", axis(1)) ///
	ylabel(-.3(.2).3, nogrid axis(2) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:+}{sub:CPI} (real-time)", axis(2))  ///	
	tlabel(516(24)713, format(%tm) labs(3) tp(i)) ttick(516(12)713, tp(i)) ttitle("") 	///
	tlabel(516 "2003" 540 "2005" 564 "2007" 588 "2009" 612 "2011" 636 "2013" 660 "2015" 684 "2017" 708 "2019") ///	
	xline(600, lc(gs13) lp(dash) lw(0.3))	///
	text(.3 696 "Corr = `corr'", place(c) size(4) box lc(gs1) bc(gs16)) ///
	title("(a) {it:z}{sup:+}{sub:CPI} (Word-repetition)", size(5) ring(100))	///
	legend(order(1 "Full-sample (6-month MA)" 2 "Real-time (6-month MA)"))	///	
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph save $figpath\Fig_5B_1, replace

** Plot (b)
corr MA_cpi_zzf MA_cpi_zzr
local corr : di %3.2f r(C)[2,1]
twoway line MA_cpi_zzf ym, yaxis(1) lw(0.6) lp(solid) lc(gs5)	///
	|| line MA_cpi_zzr ym, yaxis(2) lw(0.6) lp(dash) lc(gs5)	///
	ylabel(-.6(.4).6, nogrid axis(1) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:0}{sub:CPI} (full-sample)", axis(1)) ///
	ylabel(-1.2(.8)1.2, nogrid axis(2) labs(3) tp(i)) ///
	ytitle("{it:z}{sup:0}{sub:CPI} (real-time)", axis(2))  ///	
	tlabel(516(24)713, format(%tm) labs(3) tp(i)) ttick(516(12)713, tp(i)) ttitle("") 	///
	tlabel(516 "2003" 540 "2005" 564 "2007" 588 "2009" 612 "2011" 636 "2013" 660 "2015" 684 "2017" 708 "2019") ///	
	xline(600, lc(gs13) lp(dash) lw(0.3))	///
	text(.6 696 "Corr = `corr'", place(c) size(4) box lc(gs1) bc(gs16)) ///
	title("(b) {it:z}{sup:0}{sub:CPI} (Word-inclusion)", size(5) ring(100))	///
	legend(order(1 "Full-sample (6-month MA)" 2 "Real-time (6-month MA)"))	///	
	legend(bm(tiny) row(1) pos(6) holes(2)) ///
	graphregion(fcolor(white)) ///
	scheme(lean2)
graph save $figpath\Fig_5B_2, replace

grc1leg $figpath\Fig_5B_1.gph $figpath\Fig_5B_2.gph, row(2) ///
	iscale(0.7) imargin(10 10 5 0) legend($figpath\Fig_5B_1.gph) pos(6)	///
	scheme(lean2)
erase $figpath\Fig_5B_1.gph
erase $figpath\Fig_5B_2.gph
graph export $figpath\Fig_5B.pdf, replace