-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.R
111 lines (92 loc) · 3.86 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Extract DHS treatment parameters and ACT/non-ACT drug coverage, then prepare malaria model treatment coverage inputs
# Author: AB Hogan
# Date: 6 April 2020
# METHODOLOGY
# For each country, extract the proportion of fevers that sought medical treatment, and the proportion that received anti-malarial that is ACT. Multiply by 80% (estimated scaling factor for receiving appropriate treatment). DHS weightings are used to aggregate individual-level data at the country level.
# Packages
library(rdhs)
library(tidyverse)
library(readr)
# Selects desired surveys and years
survs <- dhs_surveys(countryIds = c("AF", "AO", "BD", "BJ", "BF", "BO", "BR", "BU", "CF", "CM", "CI", "CG", "CD", "CO", "DR", "EC", "EK", "ER", "ET", "GA", "GH", "GM", "GN", "GU", "GY", "HT", "HN", "IA", "ID", "KE", "KH", "KM", "LA", "LB", "MD", "MW", "ML", "MM", "MN", "MR", "MZ", "NG", "NM", "NC", "NI", "PE", "PH", "PK", "RW", "SD", "SN", "SL", "TD", "TZ", "TG", "TH", "TL", "VN", "YE", "UG", "ZM", "ZW"), surveyYear = c(2010:2019))
# Selects the desired datasets
datasets <- dhs_datasets(
surveyIds = survs$SurveyId,
fileFormat = "FL",
fileType = "KR")
# Downloads the chosen datasets and selects variables
downloads <- get_datasets(datasets$FileName)
question_labels <- search_variable_labels(datasets$FileName, search_terms = c("artemisinin"))
vars <- c("v005", "b5", "b8", "h22", "h32z", "ml13e")
questions <- search_variables(datasets$FileName, variables = vars)
# Print the list of questions (note: assumes questions/codes are the same across all surveys, but this is not always the case)
print(questions[1:20, 1:3])
# Extracts the data
extract <- extract_dhs(questions, add_geo = TRUE)
extract_bound <-
rbind_labelled(
extract$AOKR71FL,
extract$BJKR71FL,
extract$BFKR7AFL,
extract$BUKR70FL,
extract$CMKR61FL,
extract$CIKR62FL,
extract$CDKR61FL,
extract$GNKR62FL,
extract$GHKR7BFL,
extract$KEKR72FL,
extract$LBKR71FL,
extract$MWKR7IFL,
extract$MLKR7HFL,
extract$MZKR7AFL,
extract$NGKR7AFL,
extract$SLKR72FL,
extract$TZKR7BFL,
extract$TGKR71FL,
extract$UGKR7BFL,
extract$ZMKR71FL,
extract$CMKR61FL,
extract$CGKR61FL,
extract$ETKR71FL,
extract$TDKR71FL,
extract$GAKR61FL,
extract$GMKR61FL,
extract$MDKR71FL,
extract$SNKR7ZFL,
extract$ZWKR72FL
)
# save raw extracted data
saveRDS(extract_bound, file = "DHS_treatment.RDS")
# read in
extract_bound <- readRDS("DHS_treatment.RDS")
countrylist <- read_csv("countrylist.csv")
# aggregate at country level using DHS weightings
dat_had_fever <- extract_bound %>%
group_by(SurveyId) %>%
filter(h22 == 1, b5 == 1, b8 < 5, b8 >=1) %>%
summarise(num_children_fever = sum(v005/1e6))
dat_received_treatment <- extract_bound %>%
group_by(SurveyId) %>%
filter(h22==1, b5 ==1, h32z==1, b8 < 5, b8 >=1) %>%
summarise(received_treatment = sum(v005/1e6))
dat_received_act <- extract_bound %>%
group_by(SurveyId) %>%
filter(h22 == 1, b5 == 1, b8 < 5, b8 >=1, ml13e == 1, h32z == 1) %>%
summarise(received_act = sum(v005/1e6))
# calculate coverages and rearrange so in appropriate format
dat_all <- left_join(dat_had_fever, dat_received_treatment, by = "SurveyId") %>%
left_join(dat_received_act, by = "SurveyId") %>%
mutate(drug_cov = received_treatment/num_children_fever,
drug_cov_1_0 = received_act/num_children_fever,
drug_cov_0_0 = drug_cov - drug_cov_1_0) %>%
dplyr::select(-c(num_children_fever, received_treatment, received_act, drug_cov)) %>%
mutate(COUNTRYCODE = substr(SurveyId, 1, 2))
dat_out <- countrylist %>%
left_join(dat_all, by = "COUNTRYCODE") %>%
dplyr::select(ISO, NAME_0, CONTINENT, drug_cov_0_0, drug_cov_1_0) %>%
mutate(drug_0_efficacy = 0.75,
drug_1_efficacy = 0.95)
# read in hardcoded values for countries where no data available
#...
# save final data for model inputs
write_csv(dat_out, "drug_coverage_2020.csv")