-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpakistan_ita_aser2018.do
321 lines (247 loc) · 10.5 KB
/
pakistan_ita_aser2018.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
* use "D:\SABA\Inequality Measures\Data Analysis\ASER2018EXPORTED.dta"
cd "C:\Dropbox\BerkeleyMIDS\projects\unesco_chapter"
import excel "./data/aser/ASER2018 Rural Data/ITAASER2018Child.xlsx", first
tab C001 // TS: Age of child
clonevar age = C001
////BY CATEGORIES OF LEARNING OUTCOMES\\\\\\
* lorenz estimate WINDEX10, over(READ) graph(aspectratio(1))
// TS: wealth index, local-language reading
// I don't have WINDEX10 from the raw file, only WINDEX
ren C010 reading_local
lorenz estimate WINDEX, over(reading_local) graph(aspectratio(1))
// TS: This seems to get the analysis backward: it's showing the distribution of WEALTH
// w.r.t. the subpopulations generated by the kids' performance. i.e., it's not showing
// us learning poverty, it's just showing us poverty.
// We need to reverse the command if we want to be looking at inequality in educational performance
lorenz estimate reading_local, over(WINDEX) graph(aspectratio(1)) gini
/////Ginicoefficient\\\\\\
gen w = exp(WSCORE)
egen gini = inequal(w) , by(READ) index(gini)
// TS: I don't have the -inequal- package. Surfing around on Statalist
// (https://www.statalist.org/forums/forum/general-stata-discussion/general/1360518-how-to-combine-the-command-inequal-with-by)
// it seems to refer to this:
/*
*! version 2.1.0 9/8/94 sg30: STB-23
*Edward Whitehouse Institute for Fiscal Studies
pr def inequal
version 3.1
set more 1
local varlist "req ex max(1)"
local if "opt"
local in "opt"
local weight "fweight"
parse "`*'"
confirm new var _use _i _temp
di
di in green "inequality measures of " in yellow "`varlist'"
di in green _d(78) "-"
quietly {
preserve
gen byte _use = 1 `if' `in'
keep if _use==1
su `varlist' [`weight'`exp']
local mn = _result(3)
local tot = _result(1)
local vari = _result(4)
sort `varlist'
local wt : word 2 of `exp'
if "`wt'"=="" {gen _i = [_n]
local wt = 1}
else gen _i = sum(`wt')
* relative mean deviation
gen _temp = sum(`wt'*abs(`varlist'-`mn'))
local rmd = _temp[_N]/(2*`mn'*`tot')
* coefficient of variation
local cov = `vari'^0.5/`mn'
* standard deviation of logs
replace _temp = log(`varlist')
su _temp [`weight'`exp']
local sdl = (_result(4))^0.5
* gini
replace _temp = sum(`wt'*_i*(`varlist'-`mn'))
local gini = (2*_temp[_N])/(`tot'^2*`mn')
* mehran
replace _temp = sum(`wt'*_i*(2*`tot'+1 -_i)*(`varlist' - `mn'))
local mehran = (3*_temp[_N])/(`tot'^3*`mn')
* piesch
replace _temp = sum(`wt'*_i*(_i-1)*(`varlist'-`mn'))
local piesch = 3*_temp[_N]/(2*`tot'^3*`mn')
* kakwani
replace _temp = sum(`wt'*((`varlist'^2+`mn'^2)^0.5))
local kakwani = (1/(2-2^0.5))*((_temp[_N]/(`tot'*`mn')-2^0.5))
* theil
replace _temp = sum(`wt'*((`varlist'/`mn')*(log(`varlist'/`mn'))))
local theil = _temp[_N]/`tot'
* mean log deviation
replace _temp = sum(`wt'*(log(`mn'/`varlist')))
local mld = _temp[_N]/`tot'
}
di in green "relative mean deviation " _col(40) in yellow `rmd'
di in green "coefficient of variation" _col(40) in yellow `cov'
di in green "standard deviation of logs" _col(40) in yellow `sdl'
di in green "Gini coefficient" _col(40) in yellow `gini'
di in green "Mehran measure" _col(40) in yellow `mehran'
di in green "Piesch measure" _col(40) in yellow `piesch'
di in green "Kakwani measure" _col(40) in yellow `kakwani'
di in green "Theil entropy measure" _col(40) in yellow `theil'
di in green "Theil mean log deviation measure" _col(40) in yellow `mld'
di in green _d(78) "-"
end
*/
// TS: They recommend using ineqdeco instead, so adapting your command:
ineqdeco w, by(reading_local)
// TS: But I think what we want to be doing is not looking at wealth inequality by reading performance,
// but rather learning inequality by (whatever - in this case, you seem to want wealth quintile)
ineqdeco reading_local, by(w)
// TS: I cannot replicate this section
scatter gini READ ,connect(l) sort
scatter gini READ (C001<10),connect(l) sort
tab C005
drop GRADE
***Generating numeric variable for Early Grades for Pre Levels Assessment****
gen Grade=.
replace Grade=0 if C005=="ECE" | C005=="KG" | C005=="Kachi" | C005=="Nursery" | C005=="PG" | C005=="Prep".
replace Grade=1 if C005=="1".
replace Grade=2 if C005=="2".
****Check for Ages*****
tab C005 Grade
*****Keeping Early Grades in Data File*****
keep if Grade<3
tab C005
**Switing to concerned file***
// TS: Which file is this? Is there an ommitted -save- command above, or...?
// I will continue to work within the existing dataset for now and comment it out
* use "D:\SABA\Inequality Measures\Data Analysis\EGRAEGMA.dta"
tab C001
gen Age=.
replace Age=1 if C001==1.
replace Age=2 if C001==2.
replace Age=3 if C001==3.
replace Age=4 if C001==4.
replace Age=5 if C001==5.
replace Age=6 if C001==6.
replace Age=7 if C001==7.
replace Age=8 if C001==8.
replace Age=9 if C001==9.
replace Age=10 if C001==10.
replace Age=11 if C001>10.
tab Age
tab C001
tab Age C001
***truncating 2.5% doubtfull data**
//TS: I had renamed it to age already
* keep if C001<14
keep if age < 14
tab C001 C005
tab Age
// TS: So this was both the source and the destination file? The -use- above was
// inserted at a later stage, I guess?
* save "D:\SABA\Inequality Measures\Data Analysis\EGRAEGMA.dta", replace
gen Medu=.
replace Medu=0 if PR002==0.
replace Medu=1 if PR002==1 | PR002==2 | PR002==3 | PR002==4 | PR002==5.
replace Medu=2 if PR002==6 | PR002==7 | PR002==8 | PR002==9 | PR002==10 | PR002==11 | PR002==12 | PR002==13 | PR002==14 | PR002==15.
tab Medu PR002
tab PR009
drop Fedu
gen Fedu=.
replace Fedu=0 if PR009==0.
replace Fedu=1 if PR009==1 | PR009==2 | PR009==3 | PR009==4 | PR009==5.
replace Fedu=2 if PR009==6 | PR009==7 | PR009==8 | PR009==9 | PR009==10 | PR009==11 | PR009==12 | PR009==13 | PR009==14 | PR009==15 | PR009==16.
tab Fedu PR009
tab PR009
drop Pedu
gen Pedu=.
// TS: I'm not familiar with this syntax, and I cannot get it to run on my machine. Do we think it is a typo,
// or is drawing upon some user-defined package that I haven't installed, or...?
replace Pedu=0 if Medu==0 ^ Fedu==0.
replace Pedu=1 if Medu==1 | Fedu==1.
tab Pedu Medu
tab Fedu Medu
list Pedu Medu Fedu
tab C002
//TS: making a change here for myself...
clonevar female = C002
recode female (-1 = 1)
summarize C002
drop Itype
tab C006
gen Itype=.
replace Itype=1 if C006==1.
replace Itype=2 if C006==2.
replace Itype=3 if C006==0 | C006==3 | C006==4.
label values Itype Itype1
label define Itype1 1 "Public Institution" 2 "Private Institution" 3 "Other Institution Type"
label values Grade Grade1
label define Grade1 0 "Pre-School Levels" 1 "Grade 1" 2 "Grade 2"
labe define Grade1 2 "Grade 2"
label define foreignl 0 "domestic car" 1 "foreign car"
tab Itype C006
use "D:\SABA\Inequality Measures\Data Analysis\EGRAEGMA.dta"
****GiniCoefficients****
descogini READ WSCORE Age SEX Grade, d(4)
descogini Numeracy WSCORE Age SEX Grade, d(4)
descogini C013 WSCORE Age SEX Grade, d(4)
ginidesc READ, by(Grade)
ginidesc Numeracy, by(Grade)
ginidesc C013, by(Grade)
****Lorenz Curve - English*****
lorenz estimate C013
lorenz graph, aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(C002) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(SEX) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(RID) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Grade) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(WINDEX10) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Medu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Fedu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Pedu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Itype) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
****Lorenz Curve - URDU*****
lorenz estimate READ
lorenz graph, aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(C002) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(RID) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Grade) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(WINDEX5) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Medu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Fedu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Itype) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
****Lorenz Curve - Numeracy*****
lorenz estimate Numeracy
lorenz graph, aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(C002) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(RID) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(Grade) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(WINDEX5) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(Medu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(Itype) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
*******Summary Statistics for Learning OUtcomes******
summarize READ Numeracy C013
tab Grade, sum(READ)
tab Grade, sum(Numeracy)
tab Grade, sum(C013)
tabstat C013 READ Numeracy, by(Grade) stat(mode sd min max) nototal
tabstat price weight mpg rep78, by(foreign) stat(mean sd min max) nototal