|
发表于 2017-9-12 09:08:16
|
显示全部楼层
一段工业企业数据库的匹配样本代码,亲测可用,你可以参考一下
- clear all
- set more off
- global PATH "/Volumes/TOSHIBA EXT/Projects/NBS/China Industry Business Performance Data/Match Over Years"
- cd "$PATH"
- ******************************************************************************
- * Part 1
- * Befor run this do-file, orignial_1998.dta ~ original_2007 must be already
- * generated. Whic means that do-files 1998.do ~ 2007.do have already been
- * runned.
- *
- * Generate a id variable (id_in_source) for further combining data set
- * after match over years.
- *
- * Only keep match variables and id_in_source for the next steps
- *****************************************************************************
- forvalues i = 1998/2007{
- disp "File `i'"
- use `"../original_`i'.dta"',clear
- * gen id_in_source = _n
- if `i'==2003{
- gen town = address
- }
- gen cic = cic_adj
- replace cic = real(industry_code) if cic == .
- /*
- if year <2003{
- gen cic = cic_adj
- }
- else{
- gen cic = cic03
- }
- */
- if year<2004{
- gen revenue = sales_revenue
- }
- else{
- gen revenue = operating_revenue
- }
- gen profit = total_profit
- if year ==1999 | year == 2002{
- gen employment = staff
- }
- keep id_in_source firm_id firm_name legal_person town province ///
- telephone zip product1 founding_year cic region_code revenue ///
- employment profit
- destring founding_year revenue employment profit,replace force
- tostring cic,replace format(%04.0f)
- rename firm_id id
- rename firm_name name
- rename founding_year bdat
- rename region_code dq
- rename product1 product1_
- rename telephone phone
- foreach var of varlist *{
- rename `var' `var'`i'
- }
- compress
- saveold m`i'.dta,replace
- }
- forval i =1998/2007{
- use m`i'.dta,clear
- des,short
- }
- *******************************************************************************
- * Part 2 Match Firms over years
- * current directory should contain m1998.dta ~ m2007.dta after Part 1
- *******************************************************************************
- **************************************
- * *
- *STAGE 1: Match Two Consecutive Years*
- * *
- **************************************
- *change lower case to upper case in "firm id"
- forval i =1998/2007{
- use m`i'.dta,clear
- replace id`i' = strupper(id`i')
- compress
- saveold m`i'.10.dta,replace
- }
- forval i =1998/2007{
- use m`i'.10.dta,clear
- des,short
- }
- forval i = 1998/2006{
- local j = `i'+1
-
- **Step 10: Match by firm ID (faren daima)**
-
- *deal with duplicates of IDs (There are a few firms that have same IDs)
- disp "Step 10 "
- use m`i'.10.dta,clear
- bysort id`i': keep if _N>1
- compress
- saveold duplicates_ID`i'.dta,replace
-
- use m`i'.10.dta,clear
- bysort id`i': drop if _N>1
- rename id`i' id
- sort id
- keep *`i' id
- compress
- saveold match`i'.1.dta,replace
-
- use m`j'.10.dta,clear
- bysort id`j': keep if _N>1
- compress
- saveold duplicates_ID`j'.dta,replace
-
- use m`j'.10.dta,clear
- bysort id`j': drop if _N>1
- rename id`j' id
- keep *`j' id
- sort id
- compress
- saveold match`j'.1.dta,replace
-
- use match`i'.1.dta,clear
- merge 1:1 id using match`j'.1.dta
- keep if _m==3
- gen id`i' = id
- rename id id`j'
- drop _merge
- gen match_method_`i'_`j'="ID"
- gen match_status_`i'_`j'="3"
- compress
- saveold matched_by_ID`i'_`j'.dta,replace
-
-
- **step20: match by firm names**
-
- *match those unmatched firms in previous step by firm names*
- disp "Step 20 "
- use match`i'.1.dta,clear
- merge 1:1 id using match`j'.1.dta
- keep if _m==1
- rename id id`i'
- append using duplicates_ID`i'.dta
- bysort name`i': keep if _N>1
- keep *`i'
- compress
- saveold duplicates_name`i'.dta,replace
-
- use match`i'.1.dta,clear
- merge 1:1 id using match`j'.1.dta
- keep if _m==1
- rename id id`i'
- append using duplicates_ID`i'.dta
- bysort name`i': drop if _N>1
- rename name`i' name
- sort name
- keep *`i' name
- compress
- saveold unmatched_by_ID`i'.dta,replace
-
- use match`i'.1.dta,clear
- merge 1:1 id using match`j'.1.dta
- keep if _m==2
- rename id id`j'
- append using duplicates_ID`j'.dta
- bysort name`j': keep if _N>1
- keep *`j'
- compress
- saveold duplicates_name`j'.dta,replace
- use match`i'.1.dta,clear
- merge 1:1 id using match`j'.1.dta
- keep if _m==2
- rename id id`j'
- append using duplicates_ID`j'.dta
- bysort name`j': drop if _N>1
- rename name`j' name
- sort name
- keep *`j' name
- compress
- saveold unmatched_by_ID`j'.dta,replace
-
- use unmatched_by_ID`i'.dta,clear
- merge 1:1 name using unmatched_by_ID`j'.dta
- keep if _m==3
- gen name`i' = name
- rename name name`j'
- drop _m
- gen match_method_`i'_`j'="firm name"
- gen match_status_`i'_`j'="3"
- compress
- saveold matched_by_name`i'_`j'.dta,replace
-
-
-
- **step 30: match by the names of legal person representatives**
-
- *match those unmatched firms in previous steps by firm legal person representatives*
- disp "Step 30 "
- use unmatched_by_ID`i'.dta,clear
- merge 1:1 name using unmatched_by_ID`j'.dta
- keep if _m == 1
- rename name name`i'
- append using duplicates_name`i'.dta
- replace legal_person`i' = "." if legal_person`i' == ""
- gen code1 = legal_person`i' + substr(dq`i',1,4)
- bysort code1: keep if _N>1
- keep *`i'
- compress
- saveold duplicates_code1_`i'.dta,replace
-
- use unmatched_by_ID`i'.dta,clear
- merge 1:1 name using unmatched_by_ID`j'.dta
- keep if _m == 1
- rename name name`i'
- append using duplicates_name`i'.dta
- replace legal_person`i' = "." if legal_person`i' == ""
- gen code1 = legal_person`i' + substr(dq`i',1,4)
- bysort code1: drop if _N>1
- sort code1
- keep code1 *`i'
- compress
- saveold unmatched_by_ID_and_name`i'.dta,replace
-
- use unmatched_by_ID`i'.dta,clear
- merge 1:1 name using unmatched_by_ID`j'.dta
- keep if _m == 2
- rename name name`j'
- append using duplicates_name`j'.dta
- * replace legal_person`j' = "." if legal_person`j' == ""
- gen code1 = legal_person`j' + substr(dq`j',1,4)
- bysort code1: keep if _N>1
- keep *`j'
- compress
- saveold duplicates_code1_`j'.dta,replace
-
- use unmatched_by_ID`i'.dta,clear
- merge 1:1 name using unmatched_by_ID`j'.dta
- keep if _m == 2
- rename name name`j'
- append using duplicates_name`j'.dta
- * replace legal_person`j' = "." if legal_person`j' == ""
- gen code1 = legal_person`j' + substr(dq`j',1,4)
- bysort code1: drop if _N>1
- sort code1
- keep code1 *`j'
- compress
- saveold unmatched_by_ID_and_name`j'.dta,replace
-
- use unmatched_by_ID_and_name`i'.dta,clear
- disp _N
- merge 1:1 code1 using unmatched_by_ID_and_name`j'.dta
- keep if _m==3
- drop _m code1
- gen match_method_`i'_`j' = "legal_person"
- gen match_status_`i'_`j' = "3"
- compress
- saveold matched_by_legalperson`i'_`j'.dta,replace
-
-
- **Step 40: match by phone number + geographic code + industry code**
-
- *match those unmatched firms in previous steps by phone number + geographic code + industry code*
- disp "Step 40 "
- use unmatched_by_ID_and_name`i'.dta,clear
- merge 1:1 code1 using unmatched_by_ID_and_name`j'.dta
- keep if _m==1
- drop code1
- append using duplicates_code1_`i'.dta
- replace phone`i' = "." if phone`i' == ""
- gen code2 = substr(dq`i',1,4)+substr(cic`i',1,3)+phone`i'
- bysort code2 : keep if _N>1
- keep *`i'
- compress
- saveold duplicates_code2_`i'.dta,replace
-
- use unmatched_by_ID_and_name`i'.dta,clear
- merge 1:1 code1 using unmatched_by_ID_and_name`j'.dta
- keep if _m==1
- drop code1
- append using duplicates_code1_`i'.dta
- replace phone`i' = "." if phone`i' == ""
- gen code2 = substr(dq`i',1,4)+substr(cic`i',1,3)+phone`i'
- bysort code2 : drop if _N>1
- keep code2 *`i'
- sort code2
- compress
- saveold unmatched_by_ID_and_name_and_legalperson`i'.dta,replace
-
- use unmatched_by_ID_and_name`i'.dta,clear
- merge 1:1 code1 using unmatched_by_ID_and_name`j'.dta
- keep if _m==2
- drop code1
- append using duplicates_code1_`j'.dta
- * replace phone`j' = "." if phone`j' == ""
- gen code2 = substr(dq`j',1,4)+substr(cic`j',1,3)+phone`j'
- bysort code2 : keep if _N>1
- keep *`j'
- compress
- saveold duplicates_code2_`j'.dta,replace
- use unmatched_by_ID_and_name`i'.dta,clear
- merge 1:1 code1 using unmatched_by_ID_and_name`j'.dta
- keep if _m==2
- drop code1
- append using duplicates_code1_`j'.dta
- * replace phone`j' = "." if phone`j' == ""
- gen code2 = substr(dq`j',1,4)+substr(cic`j',1,3)+phone`j'
- bysort code2 : drop if _N>1
- sort code2
- keep code2 *`j'
- compress
- saveold unmatched_by_ID_and_name_and_legalperson`j'.dta,replace
-
- use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear
- merge 1:1 code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta
- keep if _m==3
- drop _m code2
- gen match_method_`i'_`j' = "phone number"
- gen match_status_`i'_`j' = "3"
- compress
- saveold matched_by_phone`i'_`j'.dta,replace
-
-
- **step 50: match by code = founding year + geographic code + industry code+ name of town + name of main product
-
- *match those unmatched firms in previous steps by founding year + geographic code + industry code+ name of town + name of main product
- disp "Step 50 "
- use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear
- merge 1:1 code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta
- keep if _m==1
- drop code2
- append using duplicates_code2_`i'.dta
- replace town`i' = "." if town`i' == ""
- replace product1_`i' = "." if product1_`i' == ""
- gen code3 = string(bdat`i')+substr(dq`i',1,4)+substr(cic`i',1,3)+town`i'+product1_`i'
- bysort code3: keep if _N>1
- keep *`i'
- compress
- saveold duplicates_code3_`i'.dta,replace
-
- use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear
- merge 1:1 code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta
- keep if _m==1
- drop code2
- append using duplicates_code2_`i'.dta
- replace town`i' = "." if town`i' == ""
- replace product1_`i' = "." if product1_`i' == ""
- gen code3 = string(bdat`i')+substr(dq`i',1,4)+substr(cic`i',1,3)+town`i'+product1_`i'
- bysort code3: drop if _N>1
- sort code3
- keep code3 *`i'
- compress
- saveold unmatched_by_ID_and_name_and_legalperson_and_phone_`i'.dta,replace
- use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear
- merge 1:1 code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta
- keep if _m==2
- drop code2
- append using duplicates_code2_`j'.dta
- * replace town`j' = "." if town`j' == ""
- replace product1_`j' = "." if product1_`j' == ""
- gen code3 = string(bdat`j')+substr(dq`j',1,4)+substr(cic`j',1,3)+town`j'+product1_`j'
- bysort code3: keep if _N>1
- keep *`j'
- compress
- saveold duplicates_code3_`j'.dta,replace
- use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear
- merge 1:1 code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta
- keep if _m==2
- drop code2
- append using duplicates_code2_`j'.dta
- * replace town`j' = "." if town`j' == ""
- replace product1_`j' = "." if product1_`j' == ""
- gen code3 = string(bdat`j')+substr(dq`j',1,4)+substr(cic`j',1,3)+town`j'+product1_`j'
- bysort code3: drop if _N>1
- sort code3
- keep code3 *`j'
- compress
- saveold unmatched_by_ID_and_name_and_legalperson_and_phone_`j'.dta,replace
-
- use unmatched_by_ID_and_name_and_legalperson_and_phone_`i'.dta,clear
- disp _N
- merge 1:1 code3 using unmatched_by_ID_and_name_and_legalperson_and_phone_`j'.dta
- keep if _m==3
- drop _m code3
- gen match_method_`i'_`j' = "code 3"
- gen match_status_`i'_`j' = "3"
- compress
- saveold matched_by_code3_`i'_`j'.dta,replace
-
- use unmatched_by_ID_and_name_and_legalperson_and_phone_`i'.dta,clear
- merge 1:1 code3 using unmatched_by_ID_and_name_and_legalperson_and_phone_`j'.dta
- keep if _m == 1
- drop _m code3
- append using duplicates_code3_`i'.dta
- gen match_method_`i'_`j' = ""
- gen match_status_`i'_`j' = "1"
- compress
- saveold unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2`i'.dta,replace
-
- use unmatched_by_ID_and_name_and_legalperson_and_phone_`i'.dta,clear
- merge 1:1 code3 using unmatched_by_ID_and_name_and_legalperson_and_phone_`j'.dta
- keep if _m == 2
- drop _m code3
- append using duplicates_code3_`j'.dta
- gen match_method_`i'_`j' = ""
- gen match_status_`i'_`j' = "2"
- compress
- saveold unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2`j'.dta,replace
-
-
- **step 60: merge the matched and unmatched fils to create files of to consecutive years**
- disp "Step 60 "
- use matched_by_ID`i'_`j'.dta,clear
- append using matched_by_name`i'_`j'.dta
- append using matched_by_legalperson`i'_`j'.dta
- append using matched_by_phone`i'_`j'.dta
- append using matched_by_code3_`i'_`j'.dta
- append using unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2`i'.dta
- append using unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2`j'.dta
- compress
- saveold m`i'-m`j'.dta,replace
- }
- forval i = 1998/2006{
- local j = `i'+1
- use m`i'-m`j'.dta,clear
- tab match_method_`i'_`j'
- tab match_status_`i'_`j'
- }
- *********************************************
- *
- *STAGE 2: Match over Three Consecutive Years
- *
- *********************************************
- forvalues i = 1998/2005{
- local j = `i'+1
- local k = `i'+2
-
- **Step 70: Create a three-year balanced sample
- disp "Step 70 "
- use m`i'-m`j'.dta,clear
- keep if match_status_`i'_`j' == "1"
- keep *`i'
- compress
- saveold unmatched`i'.10.dta,replace
- use m`i'-m`j'.dta,clear
- drop if match_status_`i'_`j' == "1"
- gen code = id`j'+string(revenue`j')+string(employment`j')+string(profit`j')+province`j'
- sort code
- compress
- saveold m`i'-m`j'.10.dta,replace
-
- use m`j'-m`k'.dta,clear
- keep if match_status_`j'_`k' == "2"
- keep *`k'
- compress
- saveold unmatched`k'.10.dta,replace
- use m`j'-m`k'.dta,clear
- drop if match_status_`j'_`k' == "2"
- gen code = id`j'+string(revenue`j')+string(employment`j')+string(profit`j')+province`j'
- sort code
- compress
- saveold m`j'-m`k'.10.dta,replace
- use m`i'-m`j'.10.dta,clear
- merge 1:1 code using m`j'-m`k'.10.dta
- drop _m code
- keep if match_status_`i'_`j'=="3" & match_status_`j'_`k'=="3"
- gen match_status_`i'_`k'="3"
- gen match_method_`i'_`k'="`j'"
- compress
- saveold balanced.m`i'-m`j'-m`k'.dta,replace
-
- **Step 80: Creat files for unmatched `i' firms and `k' firms**
- disp "Step 80"
- use m`i'-m`j'.10.dta,clear
- merge 1:1 code using m`j'-m`k'.10.dta
- drop _m code
- drop if match_status_`i'_`j'=="3" & match_status_`j'_`k'=="3"
- drop if id`i'==""
- gen code = id`i'+string(revenue`i')+string(employment`i')+string(profit`i')+province`i'
- sort code
- compress
- saveold unmatched`i'.15.dta,replace
-
- use unmatched`i'.15.dta,clear
- keep *`i'
- append using unmatched`i'.10.dta
- compress
- saveold unmatched`i'.20.dta,replace
-
-
- use m`i'-m`j'.10.dta,clear
- merge 1:1 code using m`j'-m`k'.10.dta
- drop _m code
- drop if match_status_`i'_`j'=="3" & match_status_`j'_`k'=="3"
- drop if id`k'== ""
- gen code = id`k'+string(revenue`k')+string(employment`k')+string(profit`k')+province`k'
- sort code
- compress
- saveold unmatched`k'.15.dta,replace
-
- use unmatched`k'.15.dta,clear
- keep *`k'
- append using unmatched`k'.10.dta
- compress
- saveold unmatched`k'.20.dta,replace
-
-
- use m`i'-m`j'.10.dta,clear
- merge 1:1 code using m`j'-m`k'.10.dta
- drop _m code
- drop if match_status_`i'_`j'=="3" & match_status_`j'_`k'=="3"
- gen code = id`j'+string(revenue`j')+string(employment`j')+string(profit`j')+province`j'
- sort code
- compress
- saveold unmatched`j'.15.dta,replace
-
-
-
- **Step 90: Match `i' firms and `k' firms by firm ID and name**
-
-
- *ID*
- disp "Step 90"
- use unmatched`i'.20.dta,clear
- bysort id`i': keep if _N>1
- compress
- saveold duplicates_ID`i'.dta,replace
-
- use unmatched`i'.20.dta,clear
- bysort id`i': drop if _N>1
- rename id`i' id
- keep *`i' id
- sort id
- compress
- saveold match`i'.1.dta,replace
-
- use unmatched`k'.20.dta,clear
- bysort id`k': keep if _N>1
- compress
- saveold duplicates_ID`k'.dta,replace
-
- use unmatched`k'.20.dta,clear
- bysort id`k': drop if _N>1
- rename id`k' id
- keep *`k' id
- sort id
- compress
- saveold match`k'.1.dta,replace
-
- use match`i'.1.dta,clear
- merge 1:1 id using match`k'.1.dta
- keep if _m==3
- gen id`i'=id
- rename id id`k'
- drop _m
- gen match_method_`i'_`k'="`j'"
- gen match_status_`i'_`k'="3"
- compress
- saveold matched_by_ID`i'_`k'.dta,replace
-
- *name*
-
- use match`i'.1.dta, clear
- merge 1:1 id using match`k'.1.dta
- keep if _merge==1
- rename id id`i'
- append using duplicates_ID`i'.dta
- bysort name`i': keep if _N>1
- keep *`i'
- compress
- saveold duplicates_name`i'.dta, replace
-
- use match`i'.1.dta, clear
- merge 1:1 id using match`k'.1.dta
- keep if _merge==1
- rename id id`i'
- append using duplicates_ID`i'.dta
- bysort name`i': drop if _N>1
- rename name`i' name
- sort name
- keep name *`i'
- compress
- saveold unmatched_by_ID`i'.dta, replace
- use match`i'.1.dta, clear
- merge 1:1 id using match`k'.1.dta
- keep if _merge==2
- rename id id`k'
- append using duplicates_ID`k'.dta
- bysort name`k': keep if _N>1
- keep *`k'
- compress
- saveold duplicates_name`k'.dta, replace
- use match`i'.1.dta, clear
- merge 1:1 id using match`k'.1.dta
- keep if _merge==2
- rename id id`k'
- append using duplicates_ID`k'.dta
- bysort name`k': drop if _N>1
- rename name`k' name
- sort name
- keep name *`k'
- compress
- saveold unmatched_by_ID`k'.dta, replace
-
- use unmatched_by_ID`i'.dta, clear
- merge 1:1 name using unmatched_by_ID`k'.dta
- keep if _merge==3
- gen name`i'=name
- rename name name`k'
- drop _merge
- gen match_method_`i'_`k'="firm name"
- gen match_status_`i'_`k'="3"
- compress
- saveold matched_by_name`i'_`k'.dta, replace
-
- use unmatched_by_ID`i'.dta, clear
- merge 1:1 name using unmatched_by_ID`k'.dta
- keep if _merge==1
- rename name name`i'
- keep *`i'
- append using duplicates_name`i'.dta
- gen match_method_`i'_`k'=""
- gen match_status_`i'_`k'="1"
- compress
- saveold unmatched_by_ID_and_name_`i'.dta, replace
-
-
- use unmatched_by_ID`i'.dta, clear
- merge 1:1 name using unmatched_by_ID`k'.dta
- keep if _merge==2
- rename name name`k'
- keep *`k'
- append using duplicates_name`k'.dta
- gen match_method_`i'_`k'=""
- gen match_status_`i'_`k'="2"
- compress
- saveold unmatched_by_ID_and_name_`k'.dta, replace
-
-
- **step 100: merge the files**
- disp "Step 100"
- use matched_by_ID`i'_`k'.dta, clear
- append using matched_by_name`i'_`k'.dta
- append using unmatched_by_ID_and_name_`i'.dta
- append using unmatched_by_ID_and_name_`k'.dta
- compress
- saveold m`i'-m`k'.dta, replace
-
- use m`i'-m`k'.dta, clear
- gen code = id`i'+string(revenue`i')+string(employment`i')+string(profit`i')+province`i'
- sort code
- *drop if code == "..."
- merge code using unmatched`i'.15.dta
- drop code _merge
- sort id`i'
- compress
- saveold m`i'-m`k'.05.dta, replace
-
- *deal with disagreement (_merge==5 if "update" is used)*
- use m`i'-m`k'.05.dta, clear
- gen code = id`k'+string(revenue`k')+string(employment`k')+string(profit`k')+province`k'
- sort code
-
- merge code using unmatched`k'.15.dta, update
- keep if _merge==5
- drop *`k'
- drop code _merge
- sort id`i'
- compress
- compress
- saveold m`i'-m`k'.disagree.dta, replace
- use m`i'-m`k'.05.dta, clear
- merge id`i' using m`i'-m`k'.disagree.dta
- drop if _merge==3
- drop _merge
- append using m`i'-m`k'.disagree.dta
-
- gen code = id`k'+string(revenue`k')+string(employment`k')+string(profit`k')+province`k'
- sort code
- merge code using unmatched`k'.15.dta, update
- drop code _merge
- gen code = id`j'+string(revenue`j')+string(employment`j')+string(profit`j')+province`j'
- sort code
- merge code using unmatched`j'.15.dta, update
- drop code _merge
- compress
- saveold m`i'-m`k'.dta.10.dta, replace
-
- use m`i'-m`k'.dta.10.dta, clear
- append using balanced.m`i'-m`j'-m`k'.dta
- drop match_status_`i'_`j'
- drop match_status_`j'_`k'
- drop match_status_`i'_`k'
- drop match_method_`i'_`j'
- drop match_method_`j'_`k'
- drop match_method_`i'_`k'
- gen match_status_`i'_`j'_`k'="`i'-`j'-`k'" if id`i'!=""&id`j'!=""&id`k'!=""
- replace match_status_`i'_`j'_`k'="`i'-`j' only" if id`i'!=""&id`j'!=""&id`k'==""
- replace match_status_`i'_`j'_`k'="`j'-`k' only" if id`i'==""&id`j'!=""&id`k'!=""
- replace match_status_`i'_`j'_`k'="`i'-`k' only" if id`i'!=""&id`j'==""&id`k'!=""
- replace match_status_`i'_`j'_`k'="`i' no match" if id`i'!=""&id`j'==""&id`k'==""
- replace match_status_`i'_`j'_`k'="`j' no match" if id`i'==""&id`j'!=""&id`k'==""
- replace match_status_`i'_`j'_`k'="`k' no match" if id`i'==""&id`j'==""&id`k'!=""
- compress
- saveold unbalanced.`i'-`j'-`k'.dta, replace
-
- }
- forval i = 1998(1)2005{
- local j=`i'+1
- local k=`i'+2
- use unbalanced.`i'-`j'-`k'.dta, clear
- tab match_status_`i'_`j'_`k'
- }
- *************************************
- * *
- * STAGE 3: Create a Ten-Year Panel *
- * *
- *************************************
- use unbalanced.1998-1999-2000.dta, clear
- tab match_status_1998_1999_2000
- gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)+province2000
- sort code
- compress
- saveold test1.dta, replace
- **step 110: add 2001 from 1999-2000-2001**
- use unbalanced.1999-2000-2001.dta, clear
- tab match_status_1999_2000_2001
- keep if match_status_1999_2000_2001=="1999-2000-2001"|match_status_1999_2000_2001=="2000-2001 only"
- gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)+province2000
- sort code
- compress
- saveold test2.dta, replace
- use test1.dta, clear
- merge code using test2.dta
- tab _merge
- drop _merge code
- gen code=id1999+string(revenue1999)+string(employment1999)+string(profit1999)+province1999
- sort code
- compress
- saveold test3.dta, replace
- use unbalanced.1999-2000-2001.dta, clear
- tab match_status_1999_2000_2001
- keep if match_status_1999_2000_2001=="1999-2001 only"
- gen code=id1999+string(revenue1999)+string(employment1999)+string(profit1999)+province1999
- sort code
- compress
- saveold test4.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update
- tab _merge
- drop code _merge
- compress
- saveold test5.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update replace
- tab _m
- keep if _merge==5
- keep *2001
- compress
- saveold test6.dta, replace
- use unbalanced.1999-2000-2001.dta, clear
- keep if match_status_1999_2000_2001=="2001 no match"
- display _N
- compress
- saveold test7.dta, replace
- use test5.dta, clear
- append using test6.dta
- dis _N
- append using test7.dta
- dis _N
- gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)+province2001
- sort code
- compress
- saveold test1.dta, replace
- **step 120: add 2002 from 2000-2001-2002**
- use unbalanced.2000-2001-2002.dta, clear
- tab match_status_2000_2001_2002
- keep if match_status_2000_2001_2002=="2000-2001-2002"|match_status_2000_2001_2002=="2001-2002 only"
- gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)+province2001
- sort code
- compress
- saveold test2.dta, replace
- use test1.dta, clear
- merge code using test2.dta
- tab _merge
- drop _merge code
- gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)+province2000
- sort code
- compress
- saveold test3.dta, replace
- use unbalanced.2000-2001-2002.dta, clear
- tab match_status_2000_2001_2002
- keep if match_status_2000_2001_2002=="2000-2002 only"
- gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)+province2000
- sort code
- compress
- saveold test4.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update
- tab _merge
- drop code _merge
- compress
- saveold test5.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update replace
- keep if _merge==5
- keep *2002
- compress
- saveold test6.dta, replace
- use unbalanced.2000-2001-2002.dta, clear
- keep if match_status_2000_2001_2002=="2002 no match"
- display _N
- compress
- saveold test7.dta, replace
- use test5.dta, clear
- append using test6.dta
- dis _N
- append using test7.dta
- dis _N
- gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)+province2002
- sort code
- compress
- saveold test1.dta, replace
- **step 130: add 2003 from 2001-2002-2003**
- use unbalanced.2001-2002-2003.dta, clear
- tab match_status_2001_2002_2003
- keep if match_status_2001_2002_2003=="2001-2002-2003"|match_status_2001_2002_2003=="2002-2003 only"
- gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)+province2002
- sort code
- compress
- saveold test2.dta, replace
- use test1.dta, clear
- merge code using test2.dta
- tab _merge
- drop _merge code
- gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)+province2001
- sort code
- compress
- saveold test3.dta, replace
- use unbalanced.2001-2002-2003.dta, clear
- tab match_status_2001_2002_2003
- keep if match_status_2001_2002_2003=="2001-2003 only"
- gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)+province2001
- sort code
- compress
- saveold test4.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update
- tab _merge
- drop code _merge
- compress
- saveold test5.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update replace
- keep if _merge==5
- keep *2003
- compress
- saveold test6.dta, replace
- use unbalanced.2001-2002-2003.dta, clear
- keep if match_status_2001_2002_2003=="2003 no match"
- display _N
- compress
- saveold test7.dta, replace
- use test5.dta, clear
- append using test6.dta
- dis _N
- append using test7.dta
- dis _N
- gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)+province2003
- sort code
- compress
- saveold test1.dta, replace
- **step 140: add 2004 from 2002-2003-2004 **
- use unbalanced.2002-2003-2004.dta, clear
- tab match_status_2002_2003_2004
- keep if match_status_2002_2003_2004=="2002-2003-2004"|match_status_2002_2003_2004=="2003-2004 only"
- gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)+province2003
- sort code
- compress
- saveold test2.dta, replace
- use test1.dta, clear
- merge code using test2.dta
- tab _merge
- drop _merge code
- gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)+province2002
- sort code
- compress
- saveold test3.dta, replace
- use unbalanced.2002-2003-2004.dta, clear
- tab match_status_2002_2003_2004
- keep if match_status_2002_2003_2004=="2002-2004 only"
- gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)+province2002
- sort code
- compress
- saveold test4.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update
- tab _merge
- drop code _merge
- compress
- saveold test5.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update replace
- keep if _merge==5
- keep *2004
- compress
- saveold test6.dta, replace
- use unbalanced.2002-2003-2004.dta, clear
- keep if match_status_2002_2003_2004=="2004 no match"
- display _N
- compress
- saveold test7.dta, replace
- use test5.dta, clear
- append using test6.dta
- dis _N
- append using test7.dta
- dis _N
- gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)+province2004
- sort code
- compress
- saveold test1.dta, replace
- **step 150: add 2005 from 2003-2004-2005 **
- use unbalanced.2003-2004-2005.dta, clear
- tab match_status_2003_2004_2005
- keep if match_status_2003_2004_2005=="2003-2004-2005"|match_status_2003_2004_2005=="2004-2005 only"
- gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)+province2004
- sort code
- compress
- saveold test2.dta, replace
- use test1.dta, clear
- merge code using test2.dta
- tab _merge
- drop _merge code
- gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)+province2003
- sort code
- compress
- saveold test3.dta, replace
- use unbalanced.2003-2004-2005.dta, clear
- tab match_status_2003_2004_2005
- keep if match_status_2003_2004_2005=="2003-2005 only"
- gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)+province2003
- sort code
- compress
- saveold test4.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update
- tab _merge
- drop code _merge
- compress
- saveold test5.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update replace
- keep if _merge==5
- keep *2005
- compress
- saveold test6.dta, replace
- use unbalanced.2003-2004-2005.dta, clear
- keep if match_status_2003_2004_2005=="2005 no match"
- display _N
- compress
- saveold test7.dta, replace
- use test5.dta, clear
- append using test6.dta
- dis _N
- append using test7.dta
- dis _N
- gen code=id2005+string(revenue2005)+string(employment2005)+string(profit2005)+province2005
- sort code
- compress
- saveold test1.dta, replace
- **step 160: add 2006 from 2004-2005-2006 **
- use unbalanced.2004-2005-2006.dta, clear
- tab match_status_2004_2005_2006
- keep if match_status_2004_2005_2006=="2004-2005-2006"|match_status_2004_2005_2006=="2005-2006 only"
- gen code=id2005+string(revenue2005)+string(employment2005)+string(profit2005)+province2005
- sort code
- compress
- saveold test2.dta, replace
- use test1.dta, clear
- merge code using test2.dta
- tab _merge
- drop _merge code
- gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)+province2004
- sort code
- compress
- saveold test3.dta, replace
- use unbalanced.2004-2005-2006.dta, clear
- tab match_status_2004_2005_2006
- keep if match_status_2004_2005_2006=="2004-2006 only"
- gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)+province2004
- sort code
- compress
- saveold test4.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update
- tab _merge
- drop code _merge
- compress
- saveold test5.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update replace
- keep if _merge==5
- keep *2006
- compress
- saveold test6.dta, replace
- use unbalanced.2004-2005-2006.dta, clear
- keep if match_status_2004_2005_2006=="2006 no match"
- display _N
- compress
- saveold test7.dta, replace
- use test5.dta, clear
- append using test6.dta
- dis _N
- append using test7.dta
- dis _N
- gen code=id2006+string(revenue2006)+string(employment2006)+string(profit2006)+province2006
- sort code
- compress
- saveold test1.dta, replace
- **step 170: add 2007 from 2005-2006-2007 **
- use unbalanced.2005-2006-2007.dta, clear
- tab match_status_2005_2006_2007
- keep if match_status_2005_2006_2007=="2005-2006-2007"|match_status_2005_2006_2007=="2006-2007 only"
- gen code=id2006+string(revenue2006)+string(employment2006)+string(profit2006)+province2006
- sort code
- compress
- saveold test2.dta, replace
- use test1.dta, clear
- merge code using test2.dta
- tab _merge
- drop _merge code
- gen code=id2005+string(revenue2005)+string(employment2005)+string(profit2005)+province2005
- sort code
- compress
- saveold test3.dta, replace
- use unbalanced.2005-2006-2007.dta, clear
- tab match_status_2005_2006_2007
- keep if match_status_2005_2006_2007=="2005-2007 only"
- gen code=id2005+string(revenue2005)+string(employment2005)+string(profit2005)+province2005
- sort code
- compress
- saveold test4.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update
- tab _merge
- drop code _merge
- saveold test5.dta, replace
- use test3.dta, clear
- merge code using test4.dta, update replace
- keep if _merge==5
- keep *2007
- compress
- saveold test6.dta, replace
- use unbalanced.2005-2006-2007.dta, clear
- keep if match_status_2005_2006_2007=="2007 no match"
- display _N
- compress
- saveold test7.dta, replace
- use test5.dta, clear
- append using test6.dta
- append using test7.dta
- drop match_status*
- compress
- saveold unbalanced.1998--2007.dta, replace
- /*
- /*erase files*/
- local file_list: dir . files "*.dta"
- foreach file of local file_list{
- if "`file'" == "unbalanced.1998--2007.dta"{
- continue
- }
- disp "erase `file'"
- erase "`file'"
- }
- */
- *use "unbalanced.1998--2007.dta",clear
- keep id_in_source*
- gen id_in_panel=_n
- reshape long id_in_source, i(id_in_panel) j(year)
- drop if id_in_source == .
- sort id_in_panel year
- saveold "PanelID_1998-2007.dta",replace
复制代码
|
|