R/enumeratorislazy.R

# WARNING - Generated by {fusen} from dev/function_documentation.Rmd: do not edit by hand


#' @name enumeratorIsLazy
#' @rdname enumeratorIsLazy
#' @title Check the enumerators who pick up less than X answers per specific question
#' @description This function display the enumerators who picked up less than a specified amount of answers per specific question.
#' This can be useful for select_multiple questions where respondent shall give at least 3 options for instance.
#'
#' @param ds dataset containing the survey (from kobo): labelled data.frame
#' @param checkperiod if not null number of day before today when the check should be made
#' @param enumeratorID name of the field where the enumerator ID is stored: string
#' @param surveyConsent name of the field in the dataset where the survey consent is stored: string
#' @param consentForValidSurvey value defined in the kobo form to acknowledge the surveyed person gave his consent: string
#' @param questionsEnumeratorIsLazy columns name from the dataset and value you want to check against (c(col1=value1,col2=value2,...)): named list of integer
#'  the column name is the main part of the name generated by kobo (eg: for the question 'main_income', kobo will generate one TRUE/FALSE
#'  column per possible answer as 'main_income.work', 'main_income.remittance'..., only the main part 'main_income' has to be specified here)
#'
#' @return result a list that includes: 
#'   * dst      same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
#'   * ret_log  list of the errors found (or NULL)
#'   * var      a list of value (or NULL)
#'   * graph    graphical representation of the results (or NULL)
#'

#' @export enumeratorIsLazy
#' @examples
#' load(system.file("sample_dataset.RData", package = "HighFrequencyChecks")) 
#' ds <- sample_dataset
#' enumeratorID <- "enumerator_id"
#' questionsEnumeratorIsLazy <- c(consent_received.shelter_nfi.non_food_items=3,
#'                                 consent_received.food_security.main_income=3,
#'                                 consent_received.child_protection.boy_risk=3,
#'                                 consent_received.child_protection.girl_risk=3)
#'
#' result <- enumeratorIsLazy(ds = ds,
#'                            enumeratorID=enumeratorID,
#'                            questionsEnumeratorIsLazy=questionsEnumeratorIsLazy)
#'
#'  knitr::kable(head(result[["ret_log"]], 10))  
#'
enumeratorIsLazy <- function(ds=NULL,
                             enumeratorID=NULL,
                             questionsEnumeratorIsLazy=NULL){
  if(is.null(ds) | nrow(ds)==0 | !is.data.frame(ds)){
    stop("Please provide the dataset")
  }
  if(is.null(enumeratorID) | !is.character(enumeratorID)){
    stop("Please provide the field where the enumerator ID is stored")
  }
  if(is.null(questionsEnumeratorIsLazy)){
    stop("Please provide the fields you want to check for (c('field1[.]','field2[.]',...))")
  }

  tmp <- stats::setNames(data.frame(matrix(ncol = 3, nrow = 0)), c("enumeratorID","NbErr","field"))
  
  require("DescTools") ## TODO maybe good to remove the %like% ... 
  for(i in length(questionsEnumeratorIsLazy)){
    # i <- 1
    tmp <- rbind(tmp, 
                 data.frame(
                   data.frame(ds, nb = rowSums(ds[,colnames(ds) %like%
                                                    paste0(names(questionsEnumeratorIsLazy[i]), "[.]")],
                                               na.rm=TRUE) ) %>%
                                  dplyr::group_by(enumeratorID = ds[,enumeratorID]) %>%
                                  dplyr::summarize(NbErr = sum(nb < questionsEnumeratorIsLazy[i])), 
                                                    field = names(questionsEnumeratorIsLazy[i]) 
                   )
                 )
  }
  ret_log  <-tmp
  
  result <- list( dst = NULL, #  same dataset as the inputed one but with survey marked for deletion if errors are found and delete=TRUE (or NULL)
                  ret_log  = ret_log , # list of the errors found (or NULL)
                  var = NULL, #  a list of value (or NULL)
                  graph = NULL) # graphical representation of the results (or NULL)
  
  
  return(result)
}