forked from PYannick/HighFrequencyChecks
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathisuniquerespondantidmissing.R
110 lines (94 loc) · 5.15 KB
/
isuniquerespondantidmissing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# WARNING - Generated by {fusen} from dev/function_documentation.Rmd: do not edit by hand
#' @name isuniquerespondantIDMissing
#' @rdname isuniquerespondantIDMissing
#' @title Missing unique ID
#' @description This function check that all interviews in the dataset have an ID.
#' There is an option to automatically mark for deletion the surveys which have not an ID.
#'
#' @param ds dataset containing the survey (from kobo): labelled data.frame
#' @param uniquerespondantID name of the field where the survey unique ID is stored: string
#' @param surveyConsent name of the field in the dataset where the survey consent is stored: string
#' @param consentForValidSurvey value defined in the kobo form to acknowledge the surveyed person gave his consent: string
#' @param enumeratorID name of the field where the enumerator ID is stored: string
#' @param reportingColumns (Optional, by default it is built from the enumeratorID and the uniquerespondantID) name of the columns from the dataset you want in the result: list of string (c('col1','col2',...))
#' @param deleteIsuniquerespondantIDMissing (Optional, by default set as FALSE) if TRUE, the survey in error will be marked as 'deletedIsuniquerespondantIDMissing': boolean (TRUE/FALSE)
#'
#' @return result a list that includes:
#' * dst same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
#' * ret_log list of the errors found (or NULL)
#' * var a list of value (or NULL)
#' * graph graphical representation of the results (or NULL)
#'
#' @export isuniquerespondantIDMissing
#' @examples
#'
#' load(system.file("sample_dataset.RData", package = "HighFrequencyChecks"))
#' ds <- sample_dataset
#' uniquerespondantID <- "X_uuid"
#' surveyConsent <- "survey_consent"
#' enumeratorID <- "enumerator_id"
#' reportingColumns <- c(enumeratorID, uniquerespondantID)
#'
#' result <- isuniquerespondantIDMissing(ds = ds,
#' uniquerespondantID=uniquerespondantID,
#' surveyConsent=surveyConsent,
#' reportingColumns=reportingColumns,
#' deleteIsuniquerespondantIDMissing=FALSE)
#' knitr::kable(head(result[["ret_log"]], 10))
#' print(result[["graph"]])
isuniquerespondantIDMissing <- function(ds=NULL,
uniquerespondantID=NULL,
surveyConsent=NULL,
reportingColumns=c(enumeratorID, uniquerespondantID),
deleteIsuniquerespondantIDMissing=FALSE)
{
if(is.null(ds) | nrow(ds)==0 | !is.data.frame(ds)){
stop("Please provide the dataset")
}
if(is.null(surveyConsent) | !is.character(surveyConsent)){
stop("Please provide the field where the survey consent is stored")
}
if(is.null(uniquerespondantID) | !is.character(uniquerespondantID)){
stop("Please provide the field where the survey unique ID is stored")
}
if(is.null(reportingColumns) | !is.character(reportingColumns)){
stop("Please provide the columns you want in the result (include the enumerator id column if you want to check by enumerator)")
}
if(is.null(deleteIsuniquerespondantIDMissing) | !is.logical(deleteIsuniquerespondantIDMissing)){
stop("Please provide the delete action to be done (TRUE/FALSE)")
}
if(deleteIsuniquerespondantIDMissing){
ds[,surveyConsent][is.na(ds[,uniquerespondantID])] <- "deletedIsuniquerespondantIDMissing"
}
# TO BE BE CHANGED WITH DYNAMIC COLUMS
ret_log <- subset(ds,is.na(ds[,uniquerespondantID]) | ds[,uniquerespondantID]=="") %>%
dplyr::select(all_of(reportingColumns), survey_consent=surveyConsent)
check <- data.frame(categories=c("OK", "NOK"),
Nb = c( length(ds[,1])-length(ret_log[,1]),
length(ret_log[,1])))
t1 <- check
t1$fraction <- t1$Nb / sum(t1$Nb)
t1 <- t1[order(t1$fraction), ]
t1$ymax <- cumsum(t1$fraction)
t1$ymin <- c(0, utils::head(t1$ymax, n=-1))
graph <- ggplot2::ggplot(t1, ggplot2::aes(fill=categories,
ymax=ymax,
ymin=ymin,
xmax=4,
xmin=3)) +
ggplot2::geom_rect(colour="grey30") +
ggplot2::coord_polar(theta="y") +
ggplot2::xlim(c(1, 4)) +
unhcrthemes::theme_unhcr(font_size = 14) +
ggplot2::theme(panel.grid=ggplot2::element_blank()) +
ggplot2::theme(axis.text=ggplot2::element_blank()) +
ggplot2::theme(axis.ticks=ggplot2::element_blank()) +
ggplot2::theme(legend.position='none') +
ggplot2::labs(title= "Are there unique respondantID Missing?",
subtitle = paste0("Errors: ", round(t1$fraction[t1$categories == 'NOK']*100,2), "%"))
result <- list( dst = NULL, # same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
ret_log = ret_log , # list of the errors found (or NULL)
var = NULL, # a list of value (or NULL)
graph = graph) # graphical representation of the results (or NULL)
return(result)
}