-
Notifications
You must be signed in to change notification settings - Fork 0
/
Step5_CleaningPop.do
128 lines (104 loc) · 2.69 KB
/
Step5_CleaningPop.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
******"DataCleaningPop"
cd "/Users/katarzyna/Desktop/STATA Code/Raw data"
clear all
*** 1. Import the datafile
insheet using NEWpop.csv, clear
*** 2. Keep the country-years of interest
gen keep = inlist(country, 2090, 2450, 3160, 4010, 4020, 4030, 4045, 4050, 4070, 4080, 4085, 4150, 4180, 4210, 4220, 4230, 4240, 4270, 4274, 4276, 4280, 4290, 4300, 4308, 5020)
keep if keep==1 & year>=1990
drop admin1 subdiv frmat pop1 lb keep
*** 3. Reshape pop counts from wide to long
isid country year sex
egen pop27 = rowtotal(pop3-pop6)
drop pop3-pop6
rename pop27 pop3
reshape long pop, i(country year sex) j(age)
*** 4. Clean age & sex
tab sex
label define sex 1 "male" 2 "female" 9 "unspecified"
label values sex sex
replace age = age-4
recode age (-2=1) (-1=2) (22=99)
#delimit;
label define age
0 "0-4"
1 "0"
2 "1-4"
3 "5-9"
4 "10-14"
5 "15-19"
6 "20-24"
7 "25-29"
8 "30-34"
9 "35-39"
10 "40-44"
11 "45-49"
12 "50-54"
13 "55-59"
14 "60-64"
15 "65-69"
16 "70-74"
17 "75-79"
18 "80-84"
19 "85-89/85+"
20 "90-94"
21 "95+"
99 "unspecified" ;
#delimit cr
label values age age
#delimit;
label define country
2090 "Canada"
2450 "United States of America"
3160 "Japan"
4010 "Austria"
4020 "Belgium"
4030 "Bulgaria"
4045 "Czech Republic"
4050 "Denmark"
4070 "Finland"
4080 "France"
4085 "Germany"
4150 "Hungary"
4180 "Italy"
4210 "Netherlands"
4220 "Norway"
4230 "Poland"
4240 "Portugal"
4270 "Romania"
4274 "Slovakia"
4276 "Slovenia"
4280 "Spain"
4290 "Sweden"
4300 "Switzerland"
4308 "United Kingdom"
5020 "Australia" ;
#delimit cr
label values country country
*** 5. Merge in additional pop data from external sources (i.e., national statistical offices)
tab country year if pop==. & age!=20 & age!=21
gen source = 1
foreach add in NEW_pop_canada NEW_pop_france NEW_pop_us {
append using `add'.dta
}
recode source (.=2)
label define source 1 "WHO" 2 "NSO"
label values source source
sort country year sex age
*** 6. Save the harmonized data
cd "/Users/katarzyna/Desktop/STATA Code/Raw data"
save NEW_pop.dta, replace
*******"MergeAll"
cd "/Users/katarzyna/Desktop/STATA Code/Raw data"
clear all
*** Merge in cleaned population counts.
***The icd10y.dta file includes information on the year when each country switched from ICD9 to ICD10. I used this to create figures and show that the two coding schemes are compatible/comparable for the causes of death under study. I created the icd10y.dta file based on the WHO availability excel file. The icd10y.dta file is available in GitHub repository.
foreach var in icd910 icd10 {
use NEW_death_`var', clear
merge m:1 country year sex age using NEW_pop.dta
sort country year sex age cause
drop if _merge==2
drop _merge
merge m:1 country using icd10y.dta, nogen
save NEW_dat_`var', replace
}