pakistan_ita_aser2018.do

* use "D:\SABA\Inequality Measures\Data Analysis\ASER2018EXPORTED.dta" 

cd "C:\Dropbox\BerkeleyMIDS\projects\unesco_chapter"
import excel "./data/aser/ASER2018 Rural Data/ITAASER2018Child.xlsx", first

tab C001 // TS: Age of child

clonevar age = C001

////BY CATEGORIES OF LEARNING OUTCOMES\\\\\\

* lorenz estimate WINDEX10, over(READ) graph(aspectratio(1))
// TS: wealth index, local-language reading
// I don't have WINDEX10 from the raw file, only WINDEX
ren C010 reading_local
lorenz estimate WINDEX, over(reading_local) graph(aspectratio(1))

// TS: This seems to get the analysis backward: it's showing the distribution of WEALTH
// w.r.t. the subpopulations generated by the kids' performance. i.e., it's not showing
// us learning poverty, it's just showing us poverty.
// We need to reverse the command if we want to be looking at inequality in educational performance
lorenz estimate reading_local, over(WINDEX) graph(aspectratio(1)) gini


/////Ginicoefficient\\\\\\

gen w = exp(WSCORE)
egen gini = inequal(w) , by(READ) index(gini)
// TS: I don't have the -inequal- package. Surfing around on Statalist
// (https://www.statalist.org/forums/forum/general-stata-discussion/general/1360518-how-to-combine-the-command-inequal-with-by)
// it seems to refer to this:
        /*
        *! version 2.1.0  9/8/94        sg30: STB-23
        *Edward Whitehouse  Institute for Fiscal Studies
        pr def inequal
          version 3.1
          set more 1
          local varlist "req ex max(1)"
          local if "opt"
          local in "opt"
          local weight "fweight"
          parse "`*'"
          confirm new var _use _i _temp 
          di
          di in green "inequality measures of " in yellow "`varlist'"
          di in green _d(78) "-"
          quietly { 
            preserve
            gen byte _use = 1 `if' `in'
            keep if _use==1
            su `varlist' [`weight'`exp']
            local mn = _result(3)
            local tot = _result(1)
            local vari = _result(4)
            sort `varlist'
            local wt : word 2 of `exp'
            if "`wt'"=="" {gen _i = [_n]
                           local wt = 1}
            else gen _i = sum(`wt')
        * relative mean deviation
            gen _temp = sum(`wt'*abs(`varlist'-`mn')) 
            local rmd = _temp[_N]/(2*`mn'*`tot')
        * coefficient of variation
            local cov = `vari'^0.5/`mn'
        * standard deviation of logs
            replace _temp = log(`varlist')
            su _temp [`weight'`exp']
            local sdl = (_result(4))^0.5
        * gini
            replace _temp = sum(`wt'*_i*(`varlist'-`mn'))
            local gini = (2*_temp[_N])/(`tot'^2*`mn')
        * mehran 
            replace _temp = sum(`wt'*_i*(2*`tot'+1 -_i)*(`varlist' - `mn'))
            local mehran = (3*_temp[_N])/(`tot'^3*`mn')
        * piesch
            replace _temp = sum(`wt'*_i*(_i-1)*(`varlist'-`mn'))
            local piesch = 3*_temp[_N]/(2*`tot'^3*`mn')
        * kakwani
            replace _temp = sum(`wt'*((`varlist'^2+`mn'^2)^0.5))
            local kakwani = (1/(2-2^0.5))*((_temp[_N]/(`tot'*`mn')-2^0.5))
        * theil 
            replace _temp = sum(`wt'*((`varlist'/`mn')*(log(`varlist'/`mn'))))
            local theil = _temp[_N]/`tot'
        * mean log deviation
            replace _temp = sum(`wt'*(log(`mn'/`varlist')))
            local mld = _temp[_N]/`tot'
            }
            di in green "relative mean deviation " _col(40) in yellow `rmd'
            di in green "coefficient of variation" _col(40) in yellow `cov'
            di in green "standard deviation of logs" _col(40) in yellow `sdl'
            di in green "Gini coefficient" _col(40) in yellow `gini'
            di in green "Mehran measure" _col(40) in yellow `mehran'
            di in green "Piesch measure" _col(40) in yellow `piesch'
            di in green "Kakwani measure" _col(40) in yellow `kakwani'
            di in green "Theil entropy measure" _col(40) in yellow `theil'
            di in green "Theil mean log deviation measure" _col(40) in yellow `mld'
            di in green _d(78) "-"
          end
         */
// TS: They recommend using ineqdeco instead, so adapting your command:
ineqdeco w, by(reading_local)

// TS: But I think what we want to be doing is not looking at wealth inequality by reading performance,
// but rather learning inequality by (whatever - in this case, you seem to want wealth quintile)
ineqdeco reading_local, by(w)

// TS: I cannot replicate this section
scatter gini READ ,connect(l) sort
scatter gini READ (C001<10),connect(l) sort

tab C005

drop GRADE

***Generating numeric variable for Early Grades for Pre Levels Assessment****

gen Grade=.
replace Grade=0 if C005=="ECE" | C005=="KG" | C005=="Kachi" | C005=="Nursery" | C005=="PG" | C005=="Prep".
replace Grade=1 if C005=="1".
replace Grade=2 if C005=="2".

****Check for Ages*****

tab C005 Grade

*****Keeping Early Grades in Data File*****

keep if Grade<3
tab C005

**Switing to concerned file***

// TS: Which file is this? Is there an ommitted -save- command above, or...?
// I will continue to work within the existing dataset for now and comment it out
* use "D:\SABA\Inequality Measures\Data Analysis\EGRAEGMA.dta" 

tab C001

gen Age=.
replace Age=1 if C001==1.
replace Age=2 if C001==2.
replace Age=3 if C001==3.
replace Age=4 if C001==4.
replace Age=5 if C001==5.
replace Age=6 if C001==6.
replace Age=7 if C001==7.
replace Age=8 if C001==8.
replace Age=9 if C001==9.
replace Age=10 if C001==10.
replace Age=11 if C001>10.

tab Age
tab C001
tab Age C001

***truncating 2.5% doubtfull data**

//TS: I had renamed it to age already
* keep if C001<14
keep if age < 14

tab C001 C005

tab Age

// TS: So this was both the source and the destination file? The -use- above was
// inserted at a later stage, I guess?
* save "D:\SABA\Inequality Measures\Data Analysis\EGRAEGMA.dta", replace

gen Medu=.
replace Medu=0 if PR002==0.
replace Medu=1 if PR002==1 | PR002==2 | PR002==3 | PR002==4 | PR002==5.
replace Medu=2 if PR002==6 | PR002==7 | PR002==8 | PR002==9 | PR002==10 | PR002==11 | PR002==12 | PR002==13 | PR002==14 | PR002==15.

tab Medu PR002

tab PR009

drop Fedu

gen Fedu=.
replace Fedu=0 if PR009==0.
replace Fedu=1 if PR009==1 | PR009==2 | PR009==3 | PR009==4 | PR009==5.
replace Fedu=2 if PR009==6 | PR009==7 | PR009==8 | PR009==9 | PR009==10 | PR009==11 | PR009==12 | PR009==13 | PR009==14 | PR009==15 | PR009==16.

tab Fedu PR009

tab PR009

drop Pedu

gen Pedu=.

// TS: I'm not familiar with this syntax, and I cannot get it to run on my machine. Do we think it is a typo,
// or is drawing upon some user-defined package that I haven't installed, or...?
replace Pedu=0 if Medu==0 ^ Fedu==0.
replace Pedu=1 if Medu==1 | Fedu==1.

tab Pedu Medu

tab Fedu Medu
list Pedu Medu Fedu

tab C002

//TS: making a change here for myself...
clonevar female = C002
recode female (-1 = 1)

summarize C002

drop Itype
tab C006

gen Itype=.
replace Itype=1 if C006==1.
replace Itype=2 if C006==2.
replace Itype=3 if C006==0 | C006==3 | C006==4.
label values Itype Itype1
label define Itype1 1 "Public Institution"  2 "Private Institution" 3 "Other Institution Type"
label values Grade Grade1
label define Grade1 0 "Pre-School Levels"  1 "Grade 1" 2 "Grade 2"
labe define Grade1 2 "Grade 2"


label define foreignl 0 "domestic  car"  1 "foreign  car"

tab Itype C006


use "D:\SABA\Inequality Measures\Data Analysis\EGRAEGMA.dta"

****GiniCoefficients****


descogini READ WSCORE Age SEX Grade, d(4)
descogini Numeracy WSCORE Age SEX Grade, d(4)
descogini C013 WSCORE Age SEX Grade, d(4)

ginidesc READ, by(Grade)
ginidesc Numeracy, by(Grade)
ginidesc C013, by(Grade)

****Lorenz Curve - English*****


lorenz estimate C013
lorenz graph, aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(C002) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(SEX) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(RID) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Grade) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(WINDEX10) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Medu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Fedu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Pedu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate C013, over(Itype) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)

****Lorenz Curve - URDU*****


lorenz estimate READ
lorenz graph, aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(C002) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(RID) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Grade) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(WINDEX5) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Medu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Fedu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate READ, over(Itype) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)


****Lorenz Curve - Numeracy*****


lorenz estimate Numeracy
lorenz graph, aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(C002) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(RID) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(Grade) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(WINDEX5) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(Medu) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)
lorenz estimate Numeracy, over(Itype) graph(aspectratio(1))
lorenz graph, overlay aspectratio(1) xlabel(, grid)

*******Summary Statistics for Learning OUtcomes******

summarize READ Numeracy C013
tab Grade, sum(READ)
tab Grade, sum(Numeracy)
tab Grade, sum(C013)

tabstat C013 READ Numeracy, by(Grade) stat(mode sd min max) nototal

tabstat price weight mpg rep78, by(foreign) stat(mean sd min max) nototal