Merge pull request #258 from Crunch-io/cbs_tracking_fixes

Fixes for tracking report edge cases
Crunch-io · Mar 29, 2021 · 60b5242 · 60b5242
2 parents 5151749 + e213f01
commit 60b5242
Show file tree

Hide file tree

Showing 13 changed files with 324 additions and 70 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,11 +2,11 @@ Package: crunchtabs
 Type: Package
 Title: Custom Report Generation for Crunch Datasets
 Description: In order to generate custom survey reports, this package provides
-    functions for computing 'toplines' (one-way frequency summaries) and
-    'banners' (cross-tabulations) of datasets in the Crunch
+    functions for computing 'toplines' (one-way frequency summaries), 
+    'banners' (cross-tabulations) and codebooks of datasets in the Crunch
     (<https://crunch.io/>) web service. Reports can be written in 'PDF' format
     using 'LaTeX' or in Microsoft Excel '.xlsx' files.
-Version: 1.4.0
+Version: 1.4.1
 Authors@R: c(
     person("Persephone", "Tsebelis", role="aut"),
     person("Kamil", "Sedrowicz", role="aut"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,17 @@
+## crunchtabs 1.4.1
+
+- Multiple adjustments for tracking report edge cases
+- In a tracking report, if an alias appears only once we now specify that by appending the wave label to the question's note.
+- In a tracking report, we now have the option to show a variable only once even if it appears in multiple datasets.
+
+Fixes:
+
+- In some cases there are duplicated row names in different positions when we use cbindFill, we did not account for this possibility.
+- When creating a tracking report for a multiple response question, it's possible that one or more of the responses is not included in one or more of the waves, we have added a tryCatch to accomodate this possibility.
+- themeNew documentation was missing a closing } that silently broke the display of documentation via ?themeNew.
+- When converting MR variables we rename subVars using numbers. If a researcher included an alias with the same number they would collide. Now we assign random strings instead of sequential numbers to the subVars.
+
+
 ## crunchtabs 1.4.0
 
 Features:

diff --git a/R/asToplineCategoricalArray.R b/R/asToplineCategoricalArray.R
@@ -28,36 +28,6 @@ as.ToplineCategoricalArray <- function(
     )
   }
 
-  categoryFill <- function(clist) {
-    cbindFill <- function(x, y) {
-      r <- merge(x, y, by = "row.names", all = TRUE, sort = FALSE)
-      rownames(r) <- r$Row.names
-      r$Row.names <- NULL
-      r
-    }
-
-    addPos <- function(x) {
-      x[,1] <- 1:nrow(x)
-      x
-    }
-
-    r <- lapply(clist, addPos)
-    r <- do.call(rbind, r)
-    r <- data.frame(nm = names(r[,1]), pos = r[,1])
-    r <- unique(r)
-    rownames(r) <- r$nm
-    r$nm <- NULL
-
-    m <- Reduce(function(x,y) suppressWarnings(cbindFill(x,y)), clist)
-    m <- suppressWarnings(merge(m, r, by = "row.names", all = TRUE, sort = FALSE))
-    m <- m[with(m, order(pos)),]
-
-    rownames(m) <- m$Row.names
-    m$Row.names <- NULL
-    m$pos <- NULL
-    as.matrix(m)
-  }
-
   counts <- obj$crosstabs$Results$`___total___`$counts
   second_label <- attr(counts, "dimnames")[[1]]
 
@@ -111,7 +81,14 @@ catArrayToCategoricals <- function(questions, question_alias, labels) {
     labels <- paste0("Wave ", seq_along(questions))
   }
 
-  nms <- paste0(question_alias, seq_along(statements))
+  # Collisions can occur so we randomize names because people version aliases sometimes
+  # votefactors with sub vars + votefactors2 :/
+  # randNames <- function(n) {
+  #   a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
+  #   paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
+  # }
+
+  nms <- paste0(question_alias, "_", seq_along(statements))
 
   # Create list of objects to fill in, one for each sub statement of the
   # multiple response group
@@ -137,11 +114,13 @@ catArrayToCategoricals <- function(questions, question_alias, labels) {
 
   # Pull out our data
   for (i in seq_len(nrow(guide))) {
-    guide$value[i] <- questions[[
-    guide$label[i]
-    ]]$crosstabs$Results$`___total___`$proportions[
-      guide$cat[i], guide$statement[i]
-    ]
+    guide$value[i] <- tryCatch({
+       questions[[
+        guide$label[i]
+      ]]$crosstabs$Results$`___total___`$proportions[
+        guide$cat[i], guide$statement[i]
+      ]
+    }, error = function(e) NA_real_)
   }
 
   # Pre allocate
@@ -175,3 +154,50 @@ catArrayToCategoricals <- function(questions, question_alias, labels) {
   names(l) <- nms
   return(l)
 }
+
+#' Merge two data.frames by rownames
+#'
+#' This function is designed to cbind via rownames where
+#' the rownames may not match and then fix the result so that
+#' it can be further merged to another data.frame.
+#' @param x A data.frame
+#' @param y A data.frame
+cbindFill <- function(x, y) {
+  r <- merge(x, y, by = "row.names", all = TRUE, sort = FALSE)
+  rownames(r) <- r$Row.names
+  r$Row.names <- NULL
+  r
+}
+
+#' Add position
+#'
+#' A small utility function to add position to a matrix
+#' @param x A matrix
+addPos <- function(x) {
+  x[,1] <- 1:nrow(x)
+  x
+}
+
+#' Column Bind Unequal Matrices
+#'
+#' This function takes a list of matrices and binds them together into
+#' a single frame. Accounts for missing or unequal rows, by rowname.
+#'
+#' @param clist A list of matrices with rownames
+categoryFill <- function(clist) {
+  r <- lapply(clist, addPos)
+  r <- do.call(rbind, r)
+  r <- data.frame(nm = names(r[,1]), pos = r[,1])
+  r <- r[!duplicated(r$nm),]
+  rownames(r) <- r$nm
+  r$nm <- NULL
+
+  m <- Reduce(function(x,y) suppressWarnings(cbindFill(x,y)), clist)
+  m <- suppressWarnings(merge(m, r, by = "row.names", all = TRUE, sort = FALSE))
+  m <- m[with(m, order(pos)),]
+
+  rownames(m) <- m$Row.names
+  m$Row.names <- NULL
+  m$pos <- NULL
+  as.matrix(m)
+}
diff --git a/R/theme.R b/R/theme.R
@@ -3,7 +3,6 @@
 #' `themeNew` produces themes for `writeExcel` or `writeLatex`.
 #'
 #' @section Theme Arguments:
-# nolint start
 #' \describe{
 #' \item{digits}{A numeric. How many digits should the data be rounded to? (In Excel, this is excel styling.) Defaults to 0.}
 #' \item{digits_numeric}{A numeric. How many digits should continuous variable data be rounded to? (In Latex, , this is Latex styling.) Defaults to 2.}
@@ -82,7 +81,6 @@
 #' \item{border_color}{In Excel, an optional color. The border color of the relevant cells.}
 #' \item{border_left}{In Excel, an optional logical. Should there be a border on the left of the relevant cells? }
 #' \item{border_right}{In Excel, an optional logical. Should there be a border on the right of the relevant cells? }
-#' \item{border_style}{In Excel, an optional character. The style of the border of the relevant
 #' \item{border_style}{In Excel, an optional character. The style of the border of the relevant cells. Valid options are: "dashDot", "dashDotDot", "dashed", "dotted", "double", "hair", "medium", "mediumDashDot", "mediumDashDotDot", "mediumDashed", "none", "slantDashDot", "thick", and "thin".}
 #' \item{border_top}{In Excel, an optional logical. Should there be a border on the top of the relevant cells? }
 #' \item{decoration}{An optional character vector. Text decorations to be applied to relevant cells. Valid options are: "bold", "italic", "strikeout", "underline", and "underline2".}
@@ -153,8 +151,6 @@ themeNew <- function(..., default_theme = themeDefaultExcel()) {
     }
   }
 
-  # nolint end
-
   theme <- modifyList(default_theme, dots, keep.null = TRUE)
   theme <- theme[union(names(dots), names(default_theme))]
 

diff --git a/R/trackingReports.R b/R/trackingReports.R
@@ -31,7 +31,8 @@ trackingReport <- function(dataset_list, vars, labels = NULL, weight = NULL, sho
   rebuilt_results <- list()
   class(rebuilt_results) <- c("Toplines", "CrunchTabs")
   rebuilt_results$results <- lapply(vars, function(x) NULL)
-  rebuilt_results$metadata <- tabs[[1]]$metadata
+  has_meta <- which(!unlist(lapply(lapply(tabs, function(x) x$metadata), is.null)))[1]
+  rebuilt_results$metadata <- tabs[[has_meta]]$metadata
   names(rebuilt_results$results) <- vars
   rebuilt_results$banner <- NULL
 
@@ -59,26 +60,10 @@ trackingReport <- function(dataset_list, vars, labels = NULL, weight = NULL, sho
     }
 
     rebuilt_results$results[[v]]$available_at <- results_available
+    rebuilt_results <- trackingDeclareAvailability(
+      rebuilt_results, results_available, var = v, labels
+    )
 
-
-    # For each alias, we set an attribute that identifies it's availability
-    # across all the datasets: "all", "partial", and "single"
-    # - "all" means it is available in every dataset
-    # - "partial" means it is available in only some datasets
-    # - "single" means it is available in exactly one dataset
-
-    # Because we use subsetting at the list level, "all" and "partial"
-    # would follow a typical path that labeling was adjusted appropriately
-    # for presentation in the resulting pdf "single" should act as a simple
-    # passthrough where no additional formatting or manipulation takes place
-    # on the result.
-
-    # The single case
-    if (length(results_available) == 1) {
-      rebuilt_results$results[[v]]$availability <- "single"
-    } else {
-      rebuilt_results$results[[v]]$availability <- "general"
-    }
   }
 
   # Now that we have an attribute that identifies availability we can use it as
@@ -132,7 +117,39 @@ trackingReport <- function(dataset_list, vars, labels = NULL, weight = NULL, sho
   rebuilt_results
 }
 
+#' Specify question availability in a tracking report
 #'
+#' For each alias, we set an attribute that identifies it's availability
+#' across all the datasets: "general", and "single"
+#' - "general" means it is available in only some datasets
+#' - "single" means it is available in exactly one dataset
+#' Because we use subsetting at the list level, "general" and "single"
+#' would follow a typical path that labeling was adjusted appropriately
+#' for presentation in the resulting pdf "single" should act as a simple
+#' passthrough where no additional formatting or manipulation takes place
+#' on the result.
+#' @md
+#' @param rebuilt_results A list of result objects from crunch
+#' @param results_available A vector identifying in which list elements
+#' @param var The name of the alias that we are declaring its availability
+#' @param labels The wave labels
+trackingDeclareAvailability <- function(rebuilt_results, results_available, var, labels) {
+  if (length(results_available) == 1) {
+    rebuilt_results$results[[var]]$availability <- "single"
+    if(rebuilt_results$results[[var]]$notes == "") {
+      rebuilt_results$results[[var]]$notes <- paste0("Asked in ", labels[results_available])
+    } else {
+      rebuilt_results$results[[var]]$notes <- paste0(
+        rebuilt_results$results[[var]]$notes,
+        " (Asked in ", labels[results_available], ")")
+    }
+  } else {
+    rebuilt_results$results[[var]]$availability <- "general"
+  }
+  return(rebuilt_results)
+}
+
+
 trackingReport_tabs <- function(datasets, vars, weight = NULL) {
   lapply(
     datasets,

diff --git a/man/addPos.Rd b/man/addPos.Rd
diff --git a/man/categoryFill.Rd b/man/categoryFill.Rd
diff --git a/man/cbindFill.Rd b/man/cbindFill.Rd
diff --git a/man/crunchtabs-package.Rd b/man/crunchtabs-package.Rd