
*****************************
*** Preparing main dataset***
*****************************

global path "."					/*path on your computer*/
cd "$path"

*** Prepare the dataset and variables needed
clear all
use "$path\_datasets\unbalanced.1998--2007.panel.dta", clear  /*path to the panel dataset 1998-2007*/
sort firm year

* Output variables (revenue)
cap rename output Y

* Free variables
cap rename employment L
cap rename wage tot_wage

* State variables
cap rename fa_net K_net /*raw capital stock*/
cap rename real_cap rK /*calculated based on Brandt .et al (2012)'s procedure*/

* Proxy variables
cap rename input M
cap rename c_dep depr /*used to calculate investment*/

* Calculate a new industry value added variables due to missing data in 2004
cap gen va_new = Y-M
cap gen va_VAT = Y-M + VATaxPayable /*followed Brandt .et al (2012)*/
cap gen VATrate = VATaxPayable/Y
su VATrate, detail
tabstat VATrate, by(year)

* Calculate real turnover, real intermediate input, real VA using 4-digit sector-specific deflators from Brandt et. al (2012)
cap gen rY = Y/OutputDefl*100
cap gen rM = M/InputDefl*100
cap gen rVAT = VATaxPayable/OutputDefl*100
cap gen rVA = rY - rM
cap gen rVA_VAT = rY - rM + rVAT

* Calculate real investment
cap gen invest = (F.fa_original - fa_original)
cap gen rdepr = depr/CapDefl*100
cap gen rinvest = invest/CapDefl*100 + rdepr

* Generate log of variables
local list Y rY va_new L tot_wage K M rK rM rVA rdepr invest rinvest
foreach x of varlist `list'{
cap gen ln`x'=ln(`x')
}

* Rename other variables
cap rename firm NewID
cap rename id LegalEntityCode
cap rename e_foreign ForeignCapitalFund
cap rename e_collective CollectiveOwnedCapitalFund
cap rename e_HMT HKMTCapitalFund
cap rename e_legal_person LegalPersonCapitalFund
cap rename e_individual PvtCapitalFund
cap rename e_state StateOwnedCapitalFund
cap rename type RegistrationType
cap rename name EnterpriseName
cap rename legal_person LegalRepresentativeName
cap rename bdat YearOpened

* Generate Capital Shares
gen PaidUpCapital = StateOwnedCapitalFund + CollectiveOwnedCapitalFund + PvtCapitalFund ///
					+ LegalPersonCapitalFund + HKMTCapitalFund + ForeignCapitalFund 				
gen PvtNewFund = PvtCapitalFund + LegalPersonCapitalFund

gen SOEShare = StateOwnedCapitalFund/PaidUpCapital
gen PvtShare = PvtCapitalFund/PaidUpCapital
gen PvtNewShare = PvtNewFund/PaidUpCapital
gen CollectShare = CollectiveOwnedCapitalFund/PaidUpCapital
gen OECDShare = ForeignCapitalFund/PaidUpCapital
gen HKMTShare = HKMTCapitalFund/PaidUpCapital
gen FSTotal = OECDShare + HKMTShare

gen OECD_idicator = (OECDShare>0)
gen HKMT_indicator = (HKMTShare>0)
gen lnOECDShare = ln(OECDShare)
gen lnHKMTShare = ln(HKMTShare)
gen lnFSTotal = ln(FSTotal)

* Generate revenue share of intermediate inputs
gen InputShare = M/Y
gen lnInputShare = ln(InputShare)

********************************************************************************
* Adding the labels to the 2-digit industries

destring cic_2_digit, replace
 
label define cic_2_digit ///
06	"Coal" ///
07	"Petro & gas" ///
08	"Min ferrous metal" ///
09	"Min non-ferrous metal" ///
10	"Min&Pro nonmetal" ///
11	"Min other" ///
13	"Pro food from agr" ///
14	"M foods" ///
15	"M beverage" ///
16	"M tobacco" ///
17	"M textile" ///
18	"M textile apparel footware" ///
19	"M leather fur" ///
20	"Pro timbers M wood" ///
21	"M furniture" ///
22	"M paper products" ///
23	"Printing" ///
24	"M culture education" ///
25	"Pro petroleum nucleus fuel" ///
26	"M chemical" ///
27	"M medicines" ///
28	"M chemical fiber" ///
29	"M rubber" ///
30	"M plastic" ///
31	"M non-metal mineral" ///
32	"M&Pro ferrous metals" ///
33	"M&Pro non-ferrous metals" ///
34	"M metal" ///
35	"M gen machine" ///
36	"M spe machine" ///
37	"M transport" ///
39	"M elec. machine" ///
40	"M commu.&computer" ///
41	"M measure instr" ///
42	"M artwork" ///
43	"Recyc&dis. waste" ///
44	"Supply elec.&heat power" ///
45	"Supply gas" ///
46	"Supply water"

label values cic_2_digit cic_2_digit

* Adding value label to RegistrationType

label define RegistrationType ///
110  "State-owned(SOE)" ///
120  "Collective-owned(Collective)" ///
130  "Equity Cooperative(JV)" ///
141  "State-owned joint-operation(SOE)" ///
142  "Collective-owned joint-operation(Collective)" ///
143  "State-collective joint-operation(SOE)" ///
149  "Other joint venture(JV)" ///
151  "State sole funded corporations(SOE)" ///
159  "Other limited liability(LTD)" ///
160  "Share-holding corporations(LTD)" ///
171  "Private sole investment(Private)" ///
172  "Private partnership(Private)" ///
173  "Private limited liability corporation(Private)" ///
174  "Private share-holding corporations(Private)" ///
190  "Other domestic enterprises(Other)" ///
210  "Joint venture with HKMT(HKMT)" ///
220  "Cooperate with HKMT(HKMT)" ///
230  "Sole HKMT funded(HKMT)" ///
240  "HKMT Ltd.(HKMT)" ///
310  "Sino-foreign joint venture(FIE)" ///
320  "Sino-foreign cooperative(FIE)" ///
330  "Sole foreign funded enterprise(FIE)" ///
340  "Foreign invested Ltd.(FIE)"

cap destring RegistrationType, replace
label values RegistrationType RegistrationType

cap drop _merge
merge m:1 RegistrationType using "$path\_datasets\derived\RegistrationType.dta"
drop if _merge==1 /*dropped 7 observations with RegistrationType==1 in raw data, which is likely due to misreporting*/
drop _merge

* Generate Ownership variable

cap gen Ownership=.
replace Ownership=1 if Type=="SOE"
replace Ownership=2 if Type=="Collective"
replace Ownership=3 if Type=="Private"|Type=="JV"
replace Ownership=4 if Type=="HKMT"
replace Ownership=5 if Type=="FIE"
replace Ownership=6 if Type=="LTD"|Type=="Other"

label define Ownership ///
1 "SOE" ///
2 "Hybrid/Collective" ///
3 "Private" ///
4 "Hong Kong, Macau, Taiwan" ///
5 "Foreign" ///
6 "Other Domestic"

label values Ownership Ownership


* Reclassify Ownership further for Ownership==6, most of them are private domestic firms
* The rest which is unclear about ownership type are dropped.

replace Ownership=1 if Ownership==6 & SOEShare>0.5
replace Ownership=2 if Ownership==6 & CollectShare>=0.9
replace Ownership=3 if Ownership==6 & PvtShare>=0.9
replace Ownership=4 if Ownership==6 & HKMTShare>0.5
replace Ownership=5 if Ownership==6 & OECDShare>0.5

replace Ownership=3 if Ownership==6 & CollectiveOwnedCapitalFund ==0 & StateOwnedCapitalFund ==0 & HKMTCapitalFund==0 & ForeignCapitalFund ==0
count if Ownership==6 // How many firms left?

gen Share1 = StateOwnedCapitalFund/PaidUpCapital
gen Share2 = CollectiveOwnedCapitalFund/PaidUpCapital
gen Share3 = PvtNewFund/PaidUpCapital
gen Share4 = HKMTCapitalFund/PaidUpCapital
gen Share5 = ForeignCapitalFund/PaidUpCapital


gen pos = .
gen max = -1

forval j = 1/5 {
		replace pos = `j' if Share`j'>=max & Share`j' <. & Ownership==6
		replace max = Share`j' if Share`j'>max & Share`j'<. & Ownership==6
	}

replace Ownership = pos if Ownership==6
drop Share*

* Checking classifications of foreign ownership
* the threshold is 25% for both type, most satisfy, there are irregularities, especially IJV

su HKMTShare if Ownership==4, detail 				/*total 217,928 observations*/
count if Ownership==4 & HKMTShare<=0 				/*27,515 observations*/
count if Ownership==4 & HKMTShare<=0.25 			/*43,390 observations*/
count if Ownership==4 & HKMTShare<=0.5 				/*79,010 observations*/
count if Ownership==4 & HKMTShare>0.5 & HKMTShare<1	/*27,280 observations*/
count if Ownership==4 & HKMTShare==1 				/*111,631 observations*/

su OECDShare if Ownership==5, detail 				/*total 198,677 observations*/
count if Ownership==5 & OECDShare<=0 				/*19,782 observations*/
count if Ownership==5 & OECDShare<=0.25 			/*34,200 observations*/
count if Ownership==5 & OECDShare<=0.5 				/*71,084 observations*/
count if Ownership==5 & OECDShare>0.5 & OECDShare<1	/*34,329 observations*/
count if Ownership==5 & OECDShare==1 				/*93,261 observations*/

* Generate Ownership Format in terms of IJV and WFOE, classify them into SOE, Domestic, IJV and WFOE
cap gen OwnerForm=.
replace OwnerForm=10 if Form=="SOE"
replace OwnerForm=20 if Form=="Domestic"
replace OwnerForm=30 if Form=="IJV-HKMT"|Form=="IJV-OECD"
replace OwnerForm=40 if Form=="WFOE-HKMT"|Form=="WFOE-OECD"

label define OwnerForm ///
10 "SOE" ///
20 "Domestic" ///
30 "IJV" ///
40 "WFOE"

label values OwnerForm OwnerForm

* Drop observation with no cic
drop if cic_2_digit==. /*dropped 169,865 observations with missing 2-digit industry*/

* Save basic dataset
save ".\_datasets\TFP_china.dta", replace







