forked from PYannick/HighFrequencyChecks
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathenumeratorislazy.R
80 lines (72 loc) · 4.15 KB
/
enumeratorislazy.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# WARNING - Generated by {fusen} from dev/function_documentation.Rmd: do not edit by hand
#' @name enumeratorIsLazy
#' @rdname enumeratorIsLazy
#' @title Check the enumerators who pick up less than X answers per specific question
#' @description This function display the enumerators who picked up less than a specified amount of answers per specific question.
#' This can be useful for select_multiple questions where respondent shall give at least 3 options for instance.
#'
#' @param ds dataset containing the survey (from kobo): labelled data.frame
#' @param checkperiod if not null number of day before today when the check should be made
#' @param enumeratorID name of the field where the enumerator ID is stored: string
#' @param surveyConsent name of the field in the dataset where the survey consent is stored: string
#' @param consentForValidSurvey value defined in the kobo form to acknowledge the surveyed person gave his consent: string
#' @param questionsEnumeratorIsLazy columns name from the dataset and value you want to check against (c(col1=value1,col2=value2,...)): named list of integer
#' the column name is the main part of the name generated by kobo (eg: for the question 'main_income', kobo will generate one TRUE/FALSE
#' column per possible answer as 'main_income.work', 'main_income.remittance'..., only the main part 'main_income' has to be specified here)
#'
#' @return result a list that includes:
#' * dst same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
#' * ret_log list of the errors found (or NULL)
#' * var a list of value (or NULL)
#' * graph graphical representation of the results (or NULL)
#'
#' @export enumeratorIsLazy
#' @examples
#' load(system.file("sample_dataset.RData", package = "HighFrequencyChecks"))
#' ds <- sample_dataset
#' enumeratorID <- "enumerator_id"
#' questionsEnumeratorIsLazy <- c(consent_received.shelter_nfi.non_food_items=3,
#' consent_received.food_security.main_income=3,
#' consent_received.child_protection.boy_risk=3,
#' consent_received.child_protection.girl_risk=3)
#'
#' result <- enumeratorIsLazy(ds = ds,
#' enumeratorID=enumeratorID,
#' questionsEnumeratorIsLazy=questionsEnumeratorIsLazy)
#'
#' knitr::kable(head(result[["ret_log"]], 10))
#'
enumeratorIsLazy <- function(ds=NULL,
enumeratorID=NULL,
questionsEnumeratorIsLazy=NULL){
if(is.null(ds) | nrow(ds)==0 | !is.data.frame(ds)){
stop("Please provide the dataset")
}
if(is.null(enumeratorID) | !is.character(enumeratorID)){
stop("Please provide the field where the enumerator ID is stored")
}
if(is.null(questionsEnumeratorIsLazy)){
stop("Please provide the fields you want to check for (c('field1[.]','field2[.]',...))")
}
tmp <- stats::setNames(data.frame(matrix(ncol = 3, nrow = 0)), c("enumeratorID","NbErr","field"))
require("DescTools") ## TODO maybe good to remove the %like% ...
for(i in length(questionsEnumeratorIsLazy)){
# i <- 1
tmp <- rbind(tmp,
data.frame(
data.frame(ds, nb = rowSums(ds[,colnames(ds) %like%
paste0(names(questionsEnumeratorIsLazy[i]), "[.]")],
na.rm=TRUE) ) %>%
dplyr::group_by(enumeratorID = ds[,enumeratorID]) %>%
dplyr::summarize(NbErr = sum(nb < questionsEnumeratorIsLazy[i])),
field = names(questionsEnumeratorIsLazy[i])
)
)
}
ret_log <-tmp
result <- list( dst = NULL, # same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
ret_log = ret_log , # list of the errors found (or NULL)
var = NULL, # a list of value (or NULL)
graph = NULL) # graphical representation of the results (or NULL)
return(result)
}