forked from PYannick/HighFrequencyChecks
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathissurveystartedbeforetheassessment.R
116 lines (102 loc) · 6.26 KB
/
issurveystartedbeforetheassessment.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# WARNING - Generated by {fusen} from dev/function_documentation.Rmd: do not edit by hand
#' @name isSurveyStartedBeforeTheAssessment
#' @rdname isSurveyStartedBeforeTheAssessment
#' @title Surveys that show start date earlier than first day of data collection
#' @description This function check that all interviews in the dataset start after the actual first day of data collection.
#' There is an option to automatically mark for deletion the surveys which have started before the first day of data collection.
#' @param ds dataset containing the survey (from kobo): labelled data.frame
#' @param checkperiod if not null number of day before today when the check should be made
#' @param dates name of the fields where the information about the start and end date of the survey is stored: list of string (c('start_date','end_date'))
#' @param surveyConsent name of the field in the dataset where the survey consent is stored: string
#' @param consentForValidSurvey value defined in the kobo form to acknowledge the surveyed person gave his consent: string
#' @param startDataCollection date of the first day of the data collection: string ('yyyy-mm-dd')
#' @param uniquerespondantID name of the field where the survey unique ID is stored: string
#' @param enumeratorID name of the field where the enumerator ID is stored: string
#' @param reportingColumns (Optional, by default it is built from the enumeratorID and the uniquerespondantID) name of the columns from the dataset you want in the result: list of string (c('col1','col2',...))
#' @param deleteIsSurveyStartedBeforeTheAssessment (Optional, by default set as FALSE) if TRUE, the survey in error will be marked as 'deletedIsSurveyStartedBeforeTheAssessment': boolean (TRUE/FALSE)
#'
#' @return result a list that includes:
#' * dst same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
#' * ret_log list of the errors found (or NULL)
#' * var a list of value (or NULL)
#' * graph graphical representation of the results (or NULL)
#'
#' @export isSurveyStartedBeforeTheAssessment
#' @examples
#' load(system.file("sample_dataset.RData", package = "HighFrequencyChecks"))
#' ds <- sample_dataset
#' dates <- c("survey_start","end_survey")
#' surveyConsent <- "survey_consent"
#' startDataCollection <- "2018-11-11"
#' uniquerespondantID <- "X_uuid"
#' enumeratorID <- "enumerator_id"
#' reportingColumns <- c(enumeratorID, uniquerespondantID)
#'
#' result <- isSurveyStartedBeforeTheAssessment(
#' ds = ds,
#' dates=dates,
#' surveyConsent=surveyConsent,
#' startDataCollection=startDataCollection,
#' reportingColumns=reportingColumns,
#' deleteIsSurveyStartedBeforeTheAssessment = FALSE)
#' knitr::kable(head(result[["ret_log"]], 10))
#' print(result[["graph"]])
isSurveyStartedBeforeTheAssessment <- function(ds = NULL,
dates = NULL,
surveyConsent = NULL,
startDataCollection = NULL,
reportingColumns=c(enumeratorID, uniquerespondantID),
deleteIsSurveyStartedBeforeTheAssessment = FALSE){
if(is.null(ds) | nrow(ds)==0 | !is.data.frame(ds)){
stop("Please provide the dataset")
}
if(is.null(surveyConsent) | !is.character(surveyConsent)){
stop("Please provide the field where the survey consent is stored")
}
if(is.null(dates) | !is.character(dates) | length(dates)!=2){
stop("Please provide the fields where the survey start and end date is stored (c('start_date','end_date'))")
}
if(is.null(startDataCollection) | !is.character(startDataCollection)){
stop("Please provide the date when the data collection began ('yyyy-mm-dd')")
}
if(is.null(reportingColumns) | !is.character(reportingColumns)){
stop("Please provide the columns you want in the result (include the enumerator id column if you want to check by enumerator)")
}
if(is.null(deleteIsSurveyStartedBeforeTheAssessment) | !is.logical(deleteIsSurveyStartedBeforeTheAssessment)){
stop("Please provide the delete action to be done (TRUE/FALSE)")
}
if(deleteIsSurveyStartedBeforeTheAssessment){
# ds[,survey_consent][start_collection > stringi::stri_datetime_format(readr::parse_datetime(ds[[dates[[1]]]]),"uuuu-MM-dd")]<-"deleted"
ds[,surveyConsent][startDataCollection>stringi::stri_datetime_format(strptime(ds[[dates[[1]]]], "%Y-%m-%dT%H:%M:%OS"),"uuuu-MM-dd")]<-"deletedIsSurveyStartedBeforeTheAssessment"
}
ret_log <- subset(ds,startDataCollection > stringi::stri_datetime_format(readr::parse_datetime(ds[[dates[[1]]]]),"uuuu-MM-dd")) %>%
dplyr::select(reportingColumns, survey_start=dates[1])
check <- data.frame(categories=c("OK", "NOK"),
Nb = c( length(ds[,1])-length(ret_log[,1]),
length(ret_log[,1])))
t1 <- check
t1$fraction <- t1$Nb / sum(t1$Nb)
t1 <- t1[order(t1$fraction), ]
t1$ymax <- cumsum(t1$fraction)
t1$ymin <- c(0, utils::head(t1$ymax, n=-1))
graph <- ggplot2::ggplot(t1, ggplot2::aes(fill=categories,
ymax=ymax,
ymin=ymin,
xmax=4,
xmin=3)) +
ggplot2::geom_rect(colour="grey30") +
ggplot2::coord_polar(theta="y") +
ggplot2::xlim(c(1, 4)) +
unhcrthemes::theme_unhcr(font_size = 14) +
ggplot2::theme(panel.grid=ggplot2::element_blank()) +
ggplot2::theme(axis.text=ggplot2::element_blank()) +
ggplot2::theme(axis.ticks=ggplot2::element_blank()) +
ggplot2::theme(legend.position='none') +
ggplot2::labs(title= "Is there any survey interview starting before official first data collection day?",
subtitle = paste0("Errors: ", round(t1$fraction[t1$categories == 'NOK']*100,2), "%"))
result <- list( dst = ds, # same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
ret_log = ret_log , # list of the errors found (or NULL)
var = NULL, # a list of value (or NULL)
graph = graph) # graphical representation of the results (or NULL)
return(result)
}