forked from PYannick/HighFrequencyChecks
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathisinterviewatthesamplepoint.R
165 lines (160 loc) · 8.61 KB
/
isinterviewatthesamplepoint.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# WARNING - Generated by {fusen} from dev/function_documentation.Rmd: do not edit by hand
#' @name isInterviewAtTheSamplePoint
#' @rdname isInterviewAtTheSamplePoint
#' @title GIS check surveys if fall without Xm radius from a sampled point
#' @description This function check that all interviews in the dataset were made within a distance from a sampled point.
#' It is based on a GIS shapefile providing the sample points for the assessment.
#' The function is based on the GPS data filled in the survey to determine their location.
#' There is an option to automatically mark for deletion the surveys which are to far away from a sampled point.
#'
#' One internal function "make_GeodesicBuffer" used to create the buffers is created by Valentin
#' https://stackoverflow.com/users/5193830/valentin
#' @param ds dataset containing the survey (from kobo): labelled data.frame
#' @param checkperiod if not null number of day before today when the check should be made
#' @param dsCoordinates name of the fields from the dataset where the information about the GPS coordinates are stored: list of string (c('Long','Lat'))
#' @param sampledPoints dataset containing the shapefile of the households sampled - Regardless the projection used for the shapefile, it is transformed to WGS84
#' @param buffer value in meter to determine the buffer from a sampled point which is acceptable: integer
#' @param surveyConsent name of the field in the dataset where the survey consent is stored: string
#' @param consentForValidSurvey value defined in the kobo form to acknowledge the surveyed person gave his consent: string
#' @param uniquerespondantID name of the field where the survey unique ID is stored: string
#' @param enumeratorID name of the field where the enumerator ID is stored: string
#' @param reportingColumns (Optional, by default it is built from the enumeratorID and the uniquerespondantID) name of the columns from the dataset you want in the result: list of string (c('col1','col2',...))
#' @param deleteIsInterviewAtTheSamplePoint (Optional, by default set as FALSE) if TRUE, the survey in error will be marked as 'deletedIsInterviewAtTheSamplePoint': boolean (TRUE/FALSE)
#'
#' @return result a list that includes:
#' * dst same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
#' * ret_log list of the errors found (or NULL)
#' * var a list of value (or NULL)
#' * graph graphical representation of the results (or NULL)
#'
#' @export isInterviewAtTheSamplePoint
#' @examples
#' load(system.file("sample_dataset.RData", package = "HighFrequencyChecks"))
#' ds <- sample_dataset
#' load(system.file("SamplePts.RData", package = "HighFrequencyChecks"))
#' sampledPoints <- SamplePts
#'
#' dsCoordinates <- c("X_gps_reading_longitude","X_gps_reading_latitude")
#' buffer <- 10
#' surveyConsent <- "survey_consent"
#' uniquerespondantID <- "X_uuid"
#' enumeratorID <- "enumerator_id"
#' reportingColumns <- c(enumeratorID, uniquerespondantID)
#'
#' # result <- isInterviewAtTheSamplePoint(ds = ds,
#' # dsCoordinates = dsCoordinates,
#' # sampledPoints=sampledPoints,
#' # buffer=buffer,
#' # surveyConsent=surveyConsent,
#' # reportingColumns=reportingColumns,
#' # deleteIsInterviewAtTheSamplePoint=FALSE)
#' # knitr::kable(head(result[["ret_log"]], 10))
#' # print(result[["graph"]])
isInterviewAtTheSamplePoint <- function(ds=NULL,
dsCoordinates=NULL,
sampledPoints=NULL,
buffer=10,
surveyConsent=NULL,
reportingColumns=c(enumeratorID, uniquerespondantID),
deleteIsInterviewAtTheSamplePoint=FALSE) {
# if(is.null(sampledPoints) | !isS4(sampledPoints) | nrow(sampledPoints)==0){
# stop("Please provide the spatial dataset of the sample points shapefile")
# }
# if(is.null(ds) | nrow(ds)==0 | !is.data.frame(ds)){
# stop("Please provide the dataset")
# }
# if(is.null(dsCoordinates) | !is.character(dsCoordinates) | length(dsCoordinates)!=2){
# stop("Please provide the fields where the coordinates are stored (c('Long','Lat'))")
# }
# if(is.null(buffer) | !is.numeric(buffer)){
# stop("Please provide the buffer in meters")
# }
# if(is.null(surveyConsent) | !is.character(surveyConsent)){
# stop("Please provide the field where the survey consent is stored")
# }
# if(is.null(reportingColumns) | !is.character(reportingColumns)){
# stop("Please provide the columns you want in the result (include the enumerator id column if you want to check by enumerator)")
# }
# if(is.null(deleteIsInterviewAtTheSamplePoint) | !is.logical(deleteIsInterviewAtTheSamplePoint)){
# stop("Please provide the delete action to be done (TRUE/FALSE)")
# }
#
# # function made by Valentin: https://stackoverflow.com/users/5193830/valentin
# make_GeodesicBuffer <- function(pts, width) {
# # A) Construct buffers as points at given distance and bearing
# dg <- seq(from = 0, to = 360, by = 5)
# # Construct equidistant points defining circle shapes (the "buffer points")
# buff.XY <- geosphere::destPoint(p = pts,
# b = rep(dg, each = length(pts)),
# d = width)
# # B) Make SpatialPolygons
# # Group (split) "buffer points" by id
# buff.XY <- as.data.frame(buff.XY)
# id <- rep(1:dim(pts)[1], times = length(dg))
# lst <- split(buff.XY, id)
# # Make Spatial Polygons out of the list of coordinates
# poly <- lapply(lst, sp::Polygon, hole = FALSE)
# polys <- lapply(list(poly), sp::Polygons, ID = NA)
# spolys <- sp::SpatialPolygons(Srl = polys,
# proj4string = sp::CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))
# # Disaggregate (split in unique polygons)
# spolys <- sp::disaggregate(spolys)
# return(spolys)
# }
#
# if(sp::is.projected(sampledPoints)){
# sampledPoints <- sp::spTransform(sampledPoints, sp::CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))
# }
# bufferSHP <- make_GeodesicBuffer(as.matrix(data.frame(lon=sampledPoints$coords.x1,lat=sampledPoints$coords.x2)),buffer)
#
# dfsp <-ds
# sp::coordinates(dfsp) <- dfsp[,c(dsCoordinates[1],dsCoordinates[2])]
# sp::proj4string(dfsp) <- sp::proj4string(bufferSHP)
# dfsp_over_buffer <- sp::over(dfsp,bufferSHP)
# fm <- data.frame(ds,dfsp_over_buffer, stringsAsFactors = FALSE)
#
# fm$Outside <- ifelse(is.na(fm$dfsp_over_buffer),"NOk","Ok")
# if(deleteIsInterviewAtTheSamplePoint){
# ds[,surveyConsent][fm$Outside=="NOk"]<-"deletedIsInterviewAtTheSamplePoint"
# }
#
# ret_log <- subset(fm, Outside=="NOk") %>%
# dplyr::select(all_of(reportingColumns), Outside=Outside)
#
# check <- data.frame(categories=c("OK", "NOK"),
# Nb = c( length(ds[,1])-length(ret_log[,1]),
# length(ret_log[,1])))
#
# t1 <- check
# t1$fraction <- t1$Nb / sum(t1$Nb)
# t1 <- t1[order(t1$fraction), ]
# t1$ymax <- cumsum(t1$fraction)
# t1$ymin <- c(0, utils::head(t1$ymax, n=-1))
#
# graph <- ggplot2::ggplot(t1, ggplot2::aes(fill=categories,
# ymax=ymax,
# ymin=ymin,
# xmax=4,
# xmin=3)) +
# ggplot2::geom_rect(colour="grey30") +
# ggplot2::coord_polar(theta="y") +
# ggplot2::xlim(c(1, 4)) +
# unhcrthemes::theme_unhcr(font_size = 14) +
# ggplot2::theme(panel.grid=ggplot2::element_blank()) +
# ggplot2::theme(axis.text=ggplot2::element_blank()) +
# ggplot2::theme(axis.ticks=ggplot2::element_blank()) +
# ggplot2::theme(legend.position='none') +
# ggplot2::labs(title= "Are interviews taking place in the expected site?",
# subtitle = paste0("Errors: ", round(t1$fraction[t1$categories == 'NOK']*100,2), "%"))
#
#
#
#
# result <- list( dst = NULL, # same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
# ret_log = ret_log , # list of the errors found (or NULL)
# var = NULL, # a list of value (or NULL)
# graph = graph) # graphical representation of the results (or NULL)
#
#
# return(result)
}